org.apache.kafka.streams.kstream.KStream Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import org.apache.kafka.clients.producer.internals.DefaultPartitioner;
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.ConnectedStoreProvider;
import org.apache.kafka.streams.processor.api.Processor;
import org.apache.kafka.streams.processor.api.ProcessorSupplier;
import org.apache.kafka.streams.processor.api.ProcessorContext;
import org.apache.kafka.streams.processor.StreamPartitioner;
import org.apache.kafka.streams.processor.TopicNameExtractor;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;
/**
* {@code KStream} is an abstraction of a record stream of {@link KeyValue} pairs, i.e., each record is an
* independent entity/event in the real world.
* For example a user X might buy two items I1 and I2, and thus there might be two records {@code , }
* in the stream.
*
* A {@code KStream} is either {@link StreamsBuilder#stream(String) defined from one or multiple Kafka topics} that
* are consumed message by message or the result of a {@code KStream} transformation.
* A {@link KTable} can also be {@link KTable#toStream() converted} into a {@code KStream}.
*
* A {@code KStream} can be transformed record by record, joined with another {@code KStream}, {@link KTable},
* {@link GlobalKTable}, or can be aggregated into a {@link KTable}.
* Kafka Streams DSL can be mixed-and-matched with Processor API (PAPI) (c.f. {@link Topology}) via
* {@link #process(ProcessorSupplier, String...) process(...)},
* {@link #transform(TransformerSupplier, String...) transform(...)}, and
* {@link #transformValues(ValueTransformerSupplier, String...) transformValues(...)}.
*
* @param Type of keys
* @param Type of values
* @see KTable
* @see KGroupedStream
* @see StreamsBuilder#stream(String)
*/
public interface KStream {
/**
* Create a new {@code KStream} that consists of all records of this stream which satisfy the given predicate.
* All records that do not satisfy the predicate are dropped.
* This is a stateless record-by-record operation.
*
* @param predicate a filter {@link Predicate} that is applied to each record
* @return a {@code KStream} that contains only those records that satisfy the given predicate
* @see #filterNot(Predicate)
*/
KStream filter(final Predicate super K, ? super V> predicate);
/**
* Create a new {@code KStream} that consists of all records of this stream which satisfy the given predicate.
* All records that do not satisfy the predicate are dropped.
* This is a stateless record-by-record operation.
*
* @param predicate a filter {@link Predicate} that is applied to each record
* @param named a {@link Named} config used to name the processor in the topology
* @return a {@code KStream} that contains only those records that satisfy the given predicate
* @see #filterNot(Predicate)
*/
KStream filter(final Predicate super K, ? super V> predicate, final Named named);
/**
* Create a new {@code KStream} that consists all records of this stream which do not satisfy the given
* predicate.
* All records that do satisfy the predicate are dropped.
* This is a stateless record-by-record operation.
*
* @param predicate a filter {@link Predicate} that is applied to each record
* @return a {@code KStream} that contains only those records that do not satisfy the given predicate
* @see #filter(Predicate)
*/
KStream filterNot(final Predicate super K, ? super V> predicate);
/**
* Create a new {@code KStream} that consists all records of this stream which do not satisfy the given
* predicate.
* All records that do satisfy the predicate are dropped.
* This is a stateless record-by-record operation.
*
* @param predicate a filter {@link Predicate} that is applied to each record
* @param named a {@link Named} config used to name the processor in the topology
* @return a {@code KStream} that contains only those records that do not satisfy the given predicate
* @see #filter(Predicate)
*/
KStream filterNot(final Predicate super K, ? super V> predicate, final Named named);
/**
* Set a new key (with possibly new type) for each input record.
* The provided {@link KeyValueMapper} is applied to each input record and computes a new key for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation.
*
* For example, you can use this transformation to set a key for a key-less input record {@code } by
* extracting a key from the value within your {@link KeyValueMapper}. The example below computes the new key as the
* length of the value string.
* {@code
* KStream keyLessStream = builder.stream("key-less-topic");
* KStream keyedStream = keyLessStream.selectKey(new KeyValueMapper {
* Integer apply(Byte[] key, String value) {
* return value.length();
* }
* });
* }
* Setting a new key might result in an internal data redistribution if a key based operator (like an aggregation or
* join) is applied to the result {@code KStream}.
*
* @param mapper a {@link KeyValueMapper} that computes a new key for each record
* @param the new key type of the result stream
* @return a {@code KStream} that contains records with new key (possibly of different type) and unmodified value
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
*/
KStream selectKey(final KeyValueMapper super K, ? super V, ? extends KR> mapper);
/**
* Set a new key (with possibly new type) for each input record.
* The provided {@link KeyValueMapper} is applied to each input record and computes a new key for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation.
*
* For example, you can use this transformation to set a key for a key-less input record {@code } by
* extracting a key from the value within your {@link KeyValueMapper}. The example below computes the new key as the
* length of the value string.
* {@code
* KStream keyLessStream = builder.stream("key-less-topic");
* KStream keyedStream = keyLessStream.selectKey(new KeyValueMapper {
* Integer apply(Byte[] key, String value) {
* return value.length();
* }
* });
* }
* Setting a new key might result in an internal data redistribution if a key based operator (like an aggregation or
* join) is applied to the result {@code KStream}.
*
* @param mapper a {@link KeyValueMapper} that computes a new key for each record
* @param named a {@link Named} config used to name the processor in the topology
* @param the new key type of the result stream
* @return a {@code KStream} that contains records with new key (possibly of different type) and unmodified value
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
*/
KStream selectKey(final KeyValueMapper super K, ? super V, ? extends KR> mapper,
final Named named);
/**
* Transform each record of the input stream into a new record in the output stream (both key and value type can be
* altered arbitrarily).
* The provided {@link KeyValueMapper} is applied to each input record and computes a new output record.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf. {@link #transform(TransformerSupplier, String...)} for
* stateful record transformation).
*
* The example below normalizes the String key to upper-case letters and counts the number of token of the value string.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.map(new KeyValueMapper> {
* KeyValue apply(String key, String value) {
* return new KeyValue<>(key.toUpperCase(), value.split(" ").length);
* }
* });
* }
* The provided {@link KeyValueMapper} must return a {@link KeyValue} type and must not return {@code null}.
*
* Mapping records might result in an internal data redistribution if a key based operator (like an aggregation or
* join) is applied to the result {@code KStream}. (cf. {@link #mapValues(ValueMapper)})
*
* @param mapper a {@link KeyValueMapper} that computes a new output record
* @param the key type of the result stream
* @param the value type of the result stream
* @return a {@code KStream} that contains records with new key and value (possibly both of different type)
* @see #selectKey(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream map(final KeyValueMapper super K, ? super V, ? extends KeyValue extends KR, ? extends VR>> mapper);
/**
* Transform each record of the input stream into a new record in the output stream (both key and value type can be
* altered arbitrarily).
* The provided {@link KeyValueMapper} is applied to each input record and computes a new output record.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf. {@link #transform(TransformerSupplier, String...)} for
* stateful record transformation).
*
* The example below normalizes the String key to upper-case letters and counts the number of token of the value string.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.map(new KeyValueMapper> {
* KeyValue apply(String key, String value) {
* return new KeyValue<>(key.toUpperCase(), value.split(" ").length);
* }
* });
* }
* The provided {@link KeyValueMapper} must return a {@link KeyValue} type and must not return {@code null}.
*
* Mapping records might result in an internal data redistribution if a key based operator (like an aggregation or
* join) is applied to the result {@code KStream}. (cf. {@link #mapValues(ValueMapper)})
*
* @param mapper a {@link KeyValueMapper} that computes a new output record
* @param named a {@link Named} config used to name the processor in the topology
* @param the key type of the result stream
* @param the value type of the result stream
* @return a {@code KStream} that contains records with new key and value (possibly both of different type)
* @see #selectKey(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream map(final KeyValueMapper super K, ? super V, ? extends KeyValue extends KR, ? extends VR>> mapper,
final Named named);
/**
* Transform the value of each input record into a new value (with possible new type) of the output record.
* The provided {@link ValueMapper} is applied to each input record value and computes a new value for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf.
* {@link #transformValues(ValueTransformerSupplier, String...)} for stateful value transformation).
*
* The example below counts the number of token of the value string.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.mapValues(new ValueMapper {
* Integer apply(String value) {
* return value.split(" ").length;
* }
* });
* }
* Setting a new value preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #map(KeyValueMapper)})
*
* @param mapper a {@link ValueMapper} that computes a new output value
* @param the value type of the result stream
* @return a {@code KStream} that contains records with unmodified key and new values (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream mapValues(final ValueMapper super V, ? extends VR> mapper);
/**
* Transform the value of each input record into a new value (with possible new type) of the output record.
* The provided {@link ValueMapper} is applied to each input record value and computes a new value for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf.
* {@link #transformValues(ValueTransformerSupplier, String...)} for stateful value transformation).
*
* The example below counts the number of token of the value string.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.mapValues(new ValueMapper {
* Integer apply(String value) {
* return value.split(" ").length;
* }
* });
* }
* Setting a new value preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #map(KeyValueMapper)})
*
* @param mapper a {@link ValueMapper} that computes a new output value
* @param named a {@link Named} config used to name the processor in the topology
* @param the value type of the result stream
* @return a {@code KStream} that contains records with unmodified key and new values (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream mapValues(final ValueMapper super V, ? extends VR> mapper,
final Named named);
/**
* Transform the value of each input record into a new value (with possible new type) of the output record.
* The provided {@link ValueMapperWithKey} is applied to each input record value and computes a new value for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf.
* {@link #transformValues(ValueTransformerWithKeySupplier, String...)} for stateful value transformation).
*
* The example below counts the number of tokens of key and value strings.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.mapValues(new ValueMapperWithKey {
* Integer apply(String readOnlyKey, String value) {
* return readOnlyKey.split(" ").length + value.split(" ").length;
* }
* });
* }
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* So, setting a new value preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #map(KeyValueMapper)})
*
* @param mapper a {@link ValueMapperWithKey} that computes a new output value
* @param the value type of the result stream
* @return a {@code KStream} that contains records with unmodified key and new values (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream mapValues(final ValueMapperWithKey super K, ? super V, ? extends VR> mapper);
/**
* Transform the value of each input record into a new value (with possible new type) of the output record.
* The provided {@link ValueMapperWithKey} is applied to each input record value and computes a new value for it.
* Thus, an input record {@code } can be transformed into an output record {@code }.
* This is a stateless record-by-record operation (cf.
* {@link #transformValues(ValueTransformerWithKeySupplier, String...)} for stateful value transformation).
*
* The example below counts the number of tokens of key and value strings.
*
{@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.mapValues(new ValueMapperWithKey {
* Integer apply(String readOnlyKey, String value) {
* return readOnlyKey.split(" ").length + value.split(" ").length;
* }
* });
* }
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* So, setting a new value preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #map(KeyValueMapper)})
*
* @param mapper a {@link ValueMapperWithKey} that computes a new output value
* @param named a {@link Named} config used to name the processor in the topology
* @param the value type of the result stream
* @return a {@code KStream} that contains records with unmodified key and new values (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream mapValues(final ValueMapperWithKey super K, ? super V, ? extends VR> mapper,
final Named named);
/**
* Transform each record of the input stream into zero or more records in the output stream (both key and value type
* can be altered arbitrarily).
* The provided {@link KeyValueMapper} is applied to each input record and computes zero or more output records.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transform(TransformerSupplier, String...)} for
* stateful record transformation).
*
* The example below splits input records {@code } containing sentences as values into their words
* and emit a record {@code } for each word.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMap(
* new KeyValueMapper>> {
* Iterable> apply(byte[] key, String value) {
* String[] tokens = value.split(" ");
* List> result = new ArrayList<>(tokens.length);
*
* for(String token : tokens) {
* result.add(new KeyValue<>(token, 1));
* }
*
* return result;
* }
* });
* }
* The provided {@link KeyValueMapper} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Flat-mapping records might result in an internal data redistribution if a key based operator (like an aggregation
* or join) is applied to the result {@code KStream}. (cf. {@link #flatMapValues(ValueMapper)})
*
* @param mapper a {@link KeyValueMapper} that computes the new output records
* @param the key type of the result stream
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with new key and value (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMap(final KeyValueMapper super K, ? super V, ? extends Iterable extends KeyValue extends KR, ? extends VR>>> mapper);
/**
* Transform each record of the input stream into zero or more records in the output stream (both key and value type
* can be altered arbitrarily).
* The provided {@link KeyValueMapper} is applied to each input record and computes zero or more output records.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transform(TransformerSupplier, String...)} for
* stateful record transformation).
*
* The example below splits input records {@code } containing sentences as values into their words
* and emit a record {@code } for each word.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMap(
* new KeyValueMapper>> {
* Iterable> apply(byte[] key, String value) {
* String[] tokens = value.split(" ");
* List> result = new ArrayList<>(tokens.length);
*
* for(String token : tokens) {
* result.add(new KeyValue<>(token, 1));
* }
*
* return result;
* }
* });
* }
* The provided {@link KeyValueMapper} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Flat-mapping records might result in an internal data redistribution if a key based operator (like an aggregation
* or join) is applied to the result {@code KStream}. (cf. {@link #flatMapValues(ValueMapper)})
*
* @param mapper a {@link KeyValueMapper} that computes the new output records
* @param named a {@link Named} config used to name the processor in the topology
* @param the key type of the result stream
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with new key and value (possibly of different type)
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #flatMapValues(ValueMapper)
* @see #flatMapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMap(final KeyValueMapper super K, ? super V, ? extends Iterable extends KeyValue extends KR, ? extends VR>>> mapper,
final Named named);
/**
* Create a new {@code KStream} by transforming the value of each record in this stream into zero or more values
* with the same key in the new stream.
* Transform the value of each input record into zero or more records with the same (unmodified) key in the output
* stream (value type can be altered arbitrarily).
* The provided {@link ValueMapper} is applied to each input record and computes zero or more output values.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transformValues(ValueTransformerSupplier, String...)}
* for stateful value transformation).
*
* The example below splits input records {@code } containing sentences as values into their words.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMapValues(new ValueMapper> {
* Iterable apply(String value) {
* return Arrays.asList(value.split(" "));
* }
* });
* }
* The provided {@link ValueMapper} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Splitting a record into multiple records with the same key preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #flatMap(KeyValueMapper)})
*
* @param mapper a {@link ValueMapper} the computes the new output values
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with unmodified keys and new values of different type
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMapValues(final ValueMapper super V, ? extends Iterable extends VR>> mapper);
/**
* Create a new {@code KStream} by transforming the value of each record in this stream into zero or more values
* with the same key in the new stream.
* Transform the value of each input record into zero or more records with the same (unmodified) key in the output
* stream (value type can be altered arbitrarily).
* The provided {@link ValueMapper} is applied to each input record and computes zero or more output values.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transformValues(ValueTransformerSupplier, String...)}
* for stateful value transformation).
*
* The example below splits input records {@code } containing sentences as values into their words.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMapValues(new ValueMapper> {
* Iterable apply(String value) {
* return Arrays.asList(value.split(" "));
* }
* });
* }
* The provided {@link ValueMapper} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Splitting a record into multiple records with the same key preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #flatMap(KeyValueMapper)})
*
* @param mapper a {@link ValueMapper} the computes the new output values
* @param named a {@link Named} config used to name the processor in the topology
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with unmodified keys and new values of different type
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMapValues(final ValueMapper super V, ? extends Iterable extends VR>> mapper,
final Named named);
/**
* Create a new {@code KStream} by transforming the value of each record in this stream into zero or more values
* with the same key in the new stream.
* Transform the value of each input record into zero or more records with the same (unmodified) key in the output
* stream (value type can be altered arbitrarily).
* The provided {@link ValueMapperWithKey} is applied to each input record and computes zero or more output values.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transformValues(ValueTransformerWithKeySupplier, String...)}
* for stateful value transformation).
*
* The example below splits input records {@code }, with key=1, containing sentences as values
* into their words.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMapValues(new ValueMapper> {
* Iterable apply(Integer readOnlyKey, String value) {
* if(readOnlyKey == 1) {
* return Arrays.asList(value.split(" "));
* } else {
* return Arrays.asList(value);
* }
* }
* });
* }
* The provided {@link ValueMapperWithKey} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* So, splitting a record into multiple records with the same key preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #flatMap(KeyValueMapper)})
*
* @param mapper a {@link ValueMapperWithKey} the computes the new output values
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with unmodified keys and new values of different type
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMapValues(final ValueMapperWithKey super K, ? super V, ? extends Iterable extends VR>> mapper);
/**
* Create a new {@code KStream} by transforming the value of each record in this stream into zero or more values
* with the same key in the new stream.
* Transform the value of each input record into zero or more records with the same (unmodified) key in the output
* stream (value type can be altered arbitrarily).
* The provided {@link ValueMapperWithKey} is applied to each input record and computes zero or more output values.
* Thus, an input record {@code } can be transformed into output records {@code , , ...}.
* This is a stateless record-by-record operation (cf. {@link #transformValues(ValueTransformerWithKeySupplier, String...)}
* for stateful value transformation).
*
* The example below splits input records {@code }, with key=1, containing sentences as values
* into their words.
* {@code
* KStream inputStream = builder.stream("topic");
* KStream outputStream = inputStream.flatMapValues(new ValueMapper> {
* Iterable apply(Integer readOnlyKey, String value) {
* if(readOnlyKey == 1) {
* return Arrays.asList(value.split(" "));
* } else {
* return Arrays.asList(value);
* }
* }
* });
* }
* The provided {@link ValueMapperWithKey} must return an {@link Iterable} (e.g., any {@link java.util.Collection} type)
* and the return value must not be {@code null}.
*
* Note that the key is read-only and should not be modified, as this can lead to corrupt partitioning.
* So, splitting a record into multiple records with the same key preserves data co-location with respect to the key.
* Thus, no internal data redistribution is required if a key based operator (like an aggregation or join)
* is applied to the result {@code KStream}. (cf. {@link #flatMap(KeyValueMapper)})
*
* @param mapper a {@link ValueMapperWithKey} the computes the new output values
* @param named a {@link Named} config used to name the processor in the topology
* @param the value type of the result stream
* @return a {@code KStream} that contains more or less records with unmodified keys and new values of different type
* @see #selectKey(KeyValueMapper)
* @see #map(KeyValueMapper)
* @see #flatMap(KeyValueMapper)
* @see #mapValues(ValueMapper)
* @see #mapValues(ValueMapperWithKey)
* @see #transform(TransformerSupplier, String...)
* @see #flatTransform(TransformerSupplier, String...)
* @see #transformValues(ValueTransformerSupplier, String...)
* @see #transformValues(ValueTransformerWithKeySupplier, String...)
* @see #flatTransformValues(ValueTransformerSupplier, String...)
* @see #flatTransformValues(ValueTransformerWithKeySupplier, String...)
*/
KStream flatMapValues(final ValueMapperWithKey super K, ? super V, ? extends Iterable extends VR>> mapper,
final Named named);
/**
* Print the records of this KStream using the options provided by {@link Printed}
* Note that this is mainly for debugging/testing purposes, and it will try to flush on each record print.
* It SHOULD NOT be used for production usage if performance requirements are concerned.
*
* @param printed options for printing
*/
void print(final Printed printed);
/**
* Perform an action on each record of {@code KStream}.
* This is a stateless record-by-record operation (cf. {@link #process(ProcessorSupplier, String...)}).
* Note that this is a terminal operation that returns void.
*
* @param action an action to perform on each record
* @see #process(ProcessorSupplier, String...)
*/
void foreach(final ForeachAction super K, ? super V> action);
/**
* Perform an action on each record of {@code KStream}.
* This is a stateless record-by-record operation (cf. {@link #process(ProcessorSupplier, String...)}).
* Note that this is a terminal operation that returns void.
*
* @param action an action to perform on each record
* @param named a {@link Named} config used to name the processor in the topology
* @see #process(ProcessorSupplier, String...)
*/
void foreach(final ForeachAction super K, ? super V> action, final Named named);
/**
* Perform an action on each record of {@code KStream}.
* This is a stateless record-by-record operation (cf. {@link #process(ProcessorSupplier, String...)}).
*
* Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection)
* and returns an unchanged stream.
*
* Note that since this operation is stateless, it may execute multiple times for a single record in failure cases.
*
* @param action an action to perform on each record
* @see #process(ProcessorSupplier, String...)
* @return itself
*/
KStream peek(final ForeachAction super K, ? super V> action);
/**
* Perform an action on each record of {@code KStream}.
* This is a stateless record-by-record operation (cf. {@link #process(ProcessorSupplier, String...)}).
*
* Peek is a non-terminal operation that triggers a side effect (such as logging or statistics collection)
* and returns an unchanged stream.
*
* Note that since this operation is stateless, it may execute multiple times for a single record in failure cases.
*
* @param action an action to perform on each record
* @param named a {@link Named} config used to name the processor in the topology
* @see #process(ProcessorSupplier, String...)
* @return itself
*/
KStream peek(final ForeachAction super K, ? super V> action, final Named named);
/**
* Creates an array of {@code KStream} from this stream by branching the records in the original stream based on
* the supplied predicates.
* Each record is evaluated against the supplied predicates, and predicates are evaluated in order.
* Each stream in the result array corresponds position-wise (index) to the predicate in the supplied predicates.
* The branching happens on first-match: A record in the original stream is assigned to the corresponding result
* stream for the first predicate that evaluates to true, and is assigned to this stream only.
* A record will be dropped if none of the predicates evaluate to true.
* This is a stateless record-by-record operation.
*
* @param predicates the ordered list of {@link Predicate} instances
* @return multiple distinct substreams of this {@code KStream}
* @deprecated since 2.8. Use {@link #split()} instead.
*/
@Deprecated
@SuppressWarnings("unchecked")
KStream[] branch(final Predicate super K, ? super V>... predicates);
/**
* Creates an array of {@code KStream} from this stream by branching the records in the original stream based on
* the supplied predicates.
* Each record is evaluated against the supplied predicates, and predicates are evaluated in order.
* Each stream in the result array corresponds position-wise (index) to the predicate in the supplied predicates.
* The branching happens on first-match: A record in the original stream is assigned to the corresponding result
* stream for the first predicate that evaluates to true, and is assigned to this stream only.
* A record will be dropped if none of the predicates evaluate to true.
* This is a stateless record-by-record operation.
*
* @param named a {@link Named} config used to name the processor in the topology
* @param predicates the ordered list of {@link Predicate} instances
* @return multiple distinct substreams of this {@code KStream}
* @deprecated since 2.8. Use {@link #split(Named)} instead.
*/
@Deprecated
@SuppressWarnings("unchecked")
KStream[] branch(final Named named, final Predicate super K, ? super V>... predicates);
/**
* Split this stream into different branches. The returned {@link BranchedKStream} instance can be used for routing
* the records to different branches depending on evaluation against the supplied predicates.
*
* Note: Stream branching is a stateless record-by-record operation.
* Please check {@link BranchedKStream} for detailed description and usage example
*
* @return {@link BranchedKStream} that provides methods for routing the records to different branches.
*/
BranchedKStream split();
/**
* Split this stream into different branches. The returned {@link BranchedKStream} instance can be used for routing
* the records to different branches depending on evaluation against the supplied predicates.
*
* Note: Stream branching is a stateless record-by-record operation.
* Please check {@link BranchedKStream} for detailed description and usage example
*
* @param named a {@link Named} config used to name the processor in the topology and also to set the name prefix
* for the resulting branches (see {@link BranchedKStream})
* @return {@link BranchedKStream} that provides methods for routing the records to different branches.
*/
BranchedKStream split(final Named named);
/**
* Merge this stream and the given stream into one larger stream.
*
* There is no ordering guarantee between records from this {@code KStream} and records from
* the provided {@code KStream} in the merged stream.
* Relative order is preserved within each input stream though (ie, records within one input
* stream are processed in order).
*
* @param stream a stream which is to be merged into this stream
* @return a merged stream containing all records from this and the provided {@code KStream}
*/
KStream merge(final KStream stream);
/**
* Merge this stream and the given stream into one larger stream.
*
* There is no ordering guarantee between records from this {@code KStream} and records from
* the provided {@code KStream} in the merged stream.
* Relative order is preserved within each input stream though (ie, records within one input
* stream are processed in order).
*
* @param stream a stream which is to be merged into this stream
* @param named a {@link Named} config used to name the processor in the topology
* @return a merged stream containing all records from this and the provided {@code KStream}
*/
KStream merge(final KStream stream, final Named named);
/**
* Materialize this stream to a topic and creates a new {@code KStream} from the topic using default serializers,
* deserializers, and producer's {@link DefaultPartitioner}.
* The specified topic should be manually created before it is used (i.e., before the Kafka Streams application is
* started).
*
* This is similar to calling {@link #to(String) #to(someTopicName)} and
* {@link StreamsBuilder#stream(String) StreamsBuilder#stream(someTopicName)}.
* Note that {@code through()} uses a hard coded {@link org.apache.kafka.streams.processor.FailOnInvalidTimestamp
* timestamp extractor} and does not allow to customize it, to ensure correct timestamp propagation.
*
* @param topic the topic name
* @return a {@code KStream} that contains the exact same (and potentially repartitioned) records as this {@code KStream}
* @deprecated since 2.6; use {@link #repartition()} instead
*/
// TODO: when removed, update `StreamsResetter` decription of --intermediate-topics
@Deprecated
KStream through(final String topic);
/**
* Materialize this stream to a topic and creates a new {@code KStream} from the topic using the
* {@link Produced} instance for configuration of the {@link Serde key serde}, {@link Serde value serde},
* and {@link StreamPartitioner}.
* The specified topic should be manually created before it is used (i.e., before the Kafka Streams application is
* started).
*
* This is similar to calling {@link #to(String, Produced) to(someTopic, Produced.with(keySerde, valueSerde)}
* and {@link StreamsBuilder#stream(String, Consumed) StreamsBuilder#stream(someTopicName, Consumed.with(keySerde, valueSerde))}.
* Note that {@code through()} uses a hard coded {@link org.apache.kafka.streams.processor.FailOnInvalidTimestamp
* timestamp extractor} and does not allow to customize it, to ensure correct timestamp propagation.
*
* @param topic the topic name
* @param produced the options to use when producing to the topic
* @return a {@code KStream} that contains the exact same (and potentially repartitioned) records as this {@code KStream}
* @deprecated since 2.6; use {@link #repartition(Repartitioned)} instead
*/
@Deprecated
KStream through(final String topic,
final Produced produced);
/**
* Materialize this stream to an auto-generated repartition topic and create a new {@code KStream}
* from the auto-generated topic using default serializers, deserializers, and producer's {@link DefaultPartitioner}.
* The number of partitions is determined based on the upstream topics partition numbers.
*
* The created topic is considered as an internal topic and is meant to be used only by the current Kafka Streams instance.
* Similar to auto-repartitioning, the topic will be created with infinite retention time and data will be automatically purged by Kafka Streams.
* The topic will be named as "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* @return {@code KStream} that contains the exact same repartitioned records as this {@code KStream}.
*/
KStream repartition();
/**
* Materialize this stream to an auto-generated repartition topic and create a new {@code KStream}
* from the auto-generated topic using {@link Serde key serde}, {@link Serde value serde}, {@link StreamPartitioner},
* number of partitions, and topic name part as defined by {@link Repartitioned}.
*
* The created topic is considered as an internal topic and is meant to be used only by the current Kafka Streams instance.
* Similar to auto-repartitioning, the topic will be created with infinite retention time and data will be automatically purged by Kafka Streams.
* The topic will be named as "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is either provided via {@link Repartitioned#as(String)} or an internally
* generated name, and "-repartition" is a fixed suffix.
*
* @param repartitioned the {@link Repartitioned} instance used to specify {@link Serdes},
* {@link StreamPartitioner} which determines how records are distributed among partitions of the topic,
* part of the topic name, and number of partitions for a repartition topic.
* @return a {@code KStream} that contains the exact same repartitioned records as this {@code KStream}.
*/
KStream repartition(final Repartitioned repartitioned);
/**
* Materialize this stream to a topic using default serializers specified in the config and producer's
* {@link DefaultPartitioner}.
* The specified topic should be manually created before it is used (i.e., before the Kafka Streams application is
* started).
*
* @param topic the topic name
*/
void to(final String topic);
/**
* Materialize this stream to a topic using the provided {@link Produced} instance.
* The specified topic should be manually created before it is used (i.e., before the Kafka Streams application is
* started).
*
* @param topic the topic name
* @param produced the options to use when producing to the topic
*/
void to(final String topic,
final Produced produced);
/**
* Dynamically materialize this stream to topics using default serializers specified in the config and producer's
* {@link DefaultPartitioner}.
* The topic names for each record to send to is dynamically determined based on the {@link TopicNameExtractor}.
*
* @param topicExtractor the extractor to determine the name of the Kafka topic to write to for each record
*/
void to(final TopicNameExtractor topicExtractor);
/**
* Dynamically materialize this stream to topics using the provided {@link Produced} instance.
* The topic names for each record to send to is dynamically determined based on the {@link TopicNameExtractor}.
*
* @param topicExtractor the extractor to determine the name of the Kafka topic to write to for each record
* @param produced the options to use when producing to the topic
*/
void to(final TopicNameExtractor topicExtractor,
final Produced produced);
/**
* Convert this stream to a {@link KTable}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic will be created in Kafka.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KTable} is partitioned
* correctly on its key.
* Note that you cannot enable {@link StreamsConfig#TOPOLOGY_OPTIMIZATION_CONFIG} config for this case, because
* repartition topics are considered transient and don't allow to recover the result {@link KTable} in cause of
* a failure; hence, a dedicated changelog topic is required to guarantee fault-tolerance.
*
* Note that this is a logical operation and only changes the "interpretation" of the stream, i.e., each record of
* it was a "fact/event" and is re-interpreted as update now (cf. {@link KStream} vs {@code KTable}).
*
* @return a {@link KTable} that contains the same records as this {@code KStream}
*/
KTable toTable();
/**
* Convert this stream to a {@link KTable}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic will be created in Kafka.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KTable} is partitioned
* correctly on its key.
* Note that you cannot enable {@link StreamsConfig#TOPOLOGY_OPTIMIZATION_CONFIG} config for this case, because
* repartition topics are considered transient and don't allow to recover the result {@link KTable} in cause of
* a failure; hence, a dedicated changelog topic is required to guarantee fault-tolerance.
*
* Note that this is a logical operation and only changes the "interpretation" of the stream, i.e., each record of
* it was a "fact/event" and is re-interpreted as update now (cf. {@link KStream} vs {@code KTable}).
*
* @param named a {@link Named} config used to name the processor in the topology
* @return a {@link KTable} that contains the same records as this {@code KStream}
*/
KTable toTable(final Named named);
/**
* Convert this stream to a {@link KTable}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic will be created in Kafka.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KTable} is partitioned
* correctly on its key.
* Note that you cannot enable {@link StreamsConfig#TOPOLOGY_OPTIMIZATION_CONFIG} config for this case, because
* repartition topics are considered transient and don't allow to recover the result {@link KTable} in cause of
* a failure; hence, a dedicated changelog topic is required to guarantee fault-tolerance.
*
* Note that this is a logical operation and only changes the "interpretation" of the stream, i.e., each record of
* it was a "fact/event" and is re-interpreted as update now (cf. {@link KStream} vs {@code KTable}).
*
* @param materialized an instance of {@link Materialized} used to describe how the state store of the
* resulting table should be materialized.
* @return a {@link KTable} that contains the same records as this {@code KStream}
*/
KTable toTable(final Materialized> materialized);
/**
* Convert this stream to a {@link KTable}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic will be created in Kafka.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KTable} is partitioned
* correctly on its key.
* Note that you cannot enable {@link StreamsConfig#TOPOLOGY_OPTIMIZATION_CONFIG} config for this case, because
* repartition topics are considered transient and don't allow to recover the result {@link KTable} in cause of
* a failure; hence, a dedicated changelog topic is required to guarantee fault-tolerance.
*
* Note that this is a logical operation and only changes the "interpretation" of the stream, i.e., each record of
* it was a "fact/event" and is re-interpreted as update now (cf. {@link KStream} vs {@code KTable}).
*
* @param named a {@link Named} config used to name the processor in the topology
* @param materialized an instance of {@link Materialized} used to describe how the state store of the
* resulting table should be materialized.
* @return a {@link KTable} that contains the same records as this {@code KStream}
*/
KTable toTable(final Named named,
final Materialized> materialized);
/**
* Group the records of this {@code KStream} on a new key that is selected using the provided {@link KeyValueMapper}
* and default serializers and deserializers.
* {@link KGroupedStream} can be further grouped with other streams to form a {@link CogroupedKStream}.
* Grouping a stream on the record key is required before an aggregation operator can be applied to the data
* (cf. {@link KGroupedStream}).
* The {@link KeyValueMapper} selects a new key (which may or may not be of the same type) while preserving the
* original values.
* If the new record key is {@code null} the record will not be included in the resulting {@link KGroupedStream}
*
* Because a new key is selected, an internal repartitioning topic may need to be created in Kafka if a
* later operator depends on the newly selected key.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* All data of this stream will be redistributed through the repartitioning topic by writing all records to it,
* and rereading all records from it, such that the resulting {@link KGroupedStream} is partitioned on the new key.
*
* This operation is equivalent to calling {@link #selectKey(KeyValueMapper)} followed by {@link #groupByKey()}.
* If the key type is changed, it is recommended to use {@link #groupBy(KeyValueMapper, Grouped)} instead.
*
* @param keySelector a {@link KeyValueMapper} that computes a new key for grouping
* @param the key type of the result {@link KGroupedStream}
* @return a {@link KGroupedStream} that contains the grouped records of the original {@code KStream}
*/
KGroupedStream groupBy(final KeyValueMapper super K, ? super V, KR> keySelector);
/**
* Group the records of this {@code KStream} on a new key that is selected using the provided {@link KeyValueMapper}
* and {@link Serde}s as specified by {@link Grouped}.
* {@link KGroupedStream} can be further grouped with other streams to form a {@link CogroupedKStream}.
* Grouping a stream on the record key is required before an aggregation operator can be applied to the data
* (cf. {@link KGroupedStream}).
* The {@link KeyValueMapper} selects a new key (which may or may not be of the same type) while preserving the
* original values.
* If the new record key is {@code null} the record will not be included in the resulting {@link KGroupedStream}.
*
* Because a new key is selected, an internal repartitioning topic may need to be created in Kafka if a later
* operator depends on the newly selected key.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is either provided via {@link org.apache.kafka.streams.kstream.Grouped#as(String)} or an
* internally generated name.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* All data of this stream will be redistributed through the repartitioning topic by writing all records to it,
* and rereading all records from it, such that the resulting {@link KGroupedStream} is partitioned on the new key.
*
* This operation is equivalent to calling {@link #selectKey(KeyValueMapper)} followed by {@link #groupByKey()}.
*
* @param keySelector a {@link KeyValueMapper} that computes a new key for grouping
* @param grouped the {@link Grouped} instance used to specify {@link org.apache.kafka.common.serialization.Serdes}
* and part of the name for a repartition topic if repartitioning is required.
* @param the key type of the result {@link KGroupedStream}
* @return a {@link KGroupedStream} that contains the grouped records of the original {@code KStream}
*/
KGroupedStream groupBy(final KeyValueMapper super K, ? super V, KR> keySelector,
final Grouped grouped);
/**
* Group the records by their current key into a {@link KGroupedStream} while preserving the original values
* and default serializers and deserializers.
* {@link KGroupedStream} can be further grouped with other streams to form a {@link CogroupedKStream}.
* Grouping a stream on the record key is required before an aggregation operator can be applied to the data
* (cf. {@link KGroupedStream}).
* If a record key is {@code null} the record will not be included in the resulting {@link KGroupedStream}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic may need to be created in
* Kafka if a later operator depends on the newly selected key.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "<name>" is an internally generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KGroupedStream} is partitioned
* correctly on its key.
* If the last key changing operator changed the key type, it is recommended to use
* {@link #groupByKey(org.apache.kafka.streams.kstream.Grouped)} instead.
*
* @return a {@link KGroupedStream} that contains the grouped records of the original {@code KStream}
* @see #groupBy(KeyValueMapper)
*/
KGroupedStream groupByKey();
/**
* Group the records by their current key into a {@link KGroupedStream} while preserving the original values
* and using the serializers as defined by {@link Grouped}.
* {@link KGroupedStream} can be further grouped with other streams to form a {@link CogroupedKStream}.
* Grouping a stream on the record key is required before an aggregation operator can be applied to the data
* (cf. {@link KGroupedStream}).
* If a record key is {@code null} the record will not be included in the resulting {@link KGroupedStream}.
*
* If a key changing operator was used before this operation (e.g., {@link #selectKey(KeyValueMapper)},
* {@link #map(KeyValueMapper)}, {@link #flatMap(KeyValueMapper)} or
* {@link #transform(TransformerSupplier, String...)}) an internal repartitioning topic may need to be created in
* Kafka if a later operator depends on the newly selected key.
* This topic will be named "${applicationId}-<name>-repartition", where "applicationId" is user-specified in
* {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* <name> is either provided via {@link org.apache.kafka.streams.kstream.Grouped#as(String)} or an internally
* generated name, and "-repartition" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* For this case, all data of this stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the resulting {@link KGroupedStream} is partitioned
* correctly on its key.
*
* @param grouped the {@link Grouped} instance used to specify {@link Serdes}
* and part of the name for a repartition topic if repartitioning is required.
* @return a {@link KGroupedStream} that contains the grouped records of the original {@code KStream}
* @see #groupBy(KeyValueMapper)
*/
KGroupedStream groupByKey(final Grouped grouped);
/**
* Join records of this stream with another {@code KStream}'s records using windowed inner equi join with default
* serializers and deserializers.
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
*
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is an
* internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key and within the joining window intervals
* @see #leftJoin(KStream, ValueJoiner, JoinWindows)
* @see #outerJoin(KStream, ValueJoiner, JoinWindows)
*/
KStream join(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed inner equi join with default
* serializers and deserializers.
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
*
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K1,B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is an
* internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key and within the joining window intervals
* @see #leftJoin(KStream, ValueJoinerWithKey, JoinWindows)
* @see #outerJoin(KStream, ValueJoinerWithKey, JoinWindows)
*/
KStream join(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed inner equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
*
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is an
* internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} used to configure join stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key and within the joining window intervals
* @see #leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see #outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
*/
KStream join(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with another {@code KStream}'s records using windowed inner equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
*
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K1,B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "storeName" is an
* internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} used to configure join stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key and within the joining window intervals
* @see #leftJoin(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
* @see #outerJoin(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
*/
KStream join(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with another {@code KStream}'s records using windowed left equi join with default
* serializers and deserializers.
* In contrast to {@link #join(KStream, ValueJoiner, JoinWindows) inner-join}, all records from this stream will
* produce at least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of this {@code KStream} that does not satisfy the join predicate the provided
* {@link ValueJoiner} will be called with a {@code null} value for the other stream.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoiner(A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key plus one for each non-matching record of
* this {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoiner, JoinWindows)
* @see #outerJoin(KStream, ValueJoiner, JoinWindows)
*/
KStream leftJoin(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed left equi join with default
* serializers and deserializers.
* In contrast to {@link #join(KStream, ValueJoinerWithKey, JoinWindows) inner-join}, all records from this stream will
* produce at least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of this {@code KStream} that does not satisfy the join predicate the provided
* {@link ValueJoinerWithKey} will be called with a {@code null} value for the other stream.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoinerWithKey(K1, A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K2, B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key plus one for each non-matching record of
* this {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoinerWithKey, JoinWindows)
* @see #outerJoin(KStream, ValueJoinerWithKey, JoinWindows)
*/
KStream leftJoin(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed left equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* In contrast to {@link #join(KStream, ValueJoiner, JoinWindows) inner-join}, all records from this stream will
* produce at least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of this {@code KStream} that does not satisfy the join predicate the provided
* {@link ValueJoiner} will be called with a {@code null} value for the other stream.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoiner(A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} instance to configure serdes and state stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key plus one for each non-matching record of
* this {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see #outerJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
*/
KStream leftJoin(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with another {@code KStream}'s records using windowed left equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* In contrast to {@link #join(KStream, ValueJoinerWithKey, JoinWindows) inner-join}, all records from this stream will
* produce at least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of this {@code KStream} that does not satisfy the join predicate the provided
* {@link ValueJoinerWithKey} will be called with a {@code null} value for the other stream.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoinerWithKey(K1,A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K2,B,b)>
*
*
*
* <K3:c>
*
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} instance to configure serdes and state stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key plus one for each non-matching record of
* this {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
* @see #outerJoin(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
*/
KStream leftJoin(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with another {@code KStream}'s records using windowed outer equi join with default
* serializers and deserializers.
* In contrast to {@link #join(KStream, ValueJoiner, JoinWindows) inner-join} or
* {@link #leftJoin(KStream, ValueJoiner, JoinWindows) left-join}, all records from both streams will produce at
* least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of both {@code KStream}s that does not satisfy the join predicate the provided
* {@link ValueJoiner} will be called with a {@code null} value for the this/other stream, respectively.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoiner(A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(null,b)>
<K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
* <K3:ValueJoiner(null,c)>
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key plus one for each non-matching record of
* both {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoiner, JoinWindows)
* @see #leftJoin(KStream, ValueJoiner, JoinWindows)
*/
KStream outerJoin(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed outer equi join with default
* serializers and deserializers.
* In contrast to {@link #join(KStream, ValueJoinerWithKey, JoinWindows) inner-join} or
* {@link #leftJoin(KStream, ValueJoinerWithKey, JoinWindows) left-join}, all records from both streams will produce at
* least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of both {@code KStream}s that does not satisfy the join predicate the provided
* {@link ValueJoinerWithKey} will be called with a {@code null} value for the this/other stream, respectively.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoinerWithKey(K1,A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K2,null,b)>
<K2:ValueJoinerWithKey(K2,B,b)>
*
*
*
* <K3:c>
* <K3:ValueJoinerWithKey(K3,null,c)>
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param the value type of the other stream
* @param the value type of the result stream
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key plus one for each non-matching record of
* both {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoinerWithKey, JoinWindows)
* @see #leftJoin(KStream, ValueJoinerWithKey, JoinWindows)
*/
KStream outerJoin(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows);
/**
* Join records of this stream with another {@code KStream}'s records using windowed outer equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* In contrast to {@link #join(KStream, ValueJoiner, JoinWindows) inner-join} or
* {@link #leftJoin(KStream, ValueJoiner, JoinWindows) left-join}, all records from both streams will produce at
* least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoiner} will be called to compute
* a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of both {@code KStream}s that does not satisfy the join predicate the provided
* {@link ValueJoiner} will be called with a {@code null} value for this/other stream, respectively.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoiner(A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoiner(null,b)>
<K2:ValueJoiner(B,b)>
*
*
*
* <K3:c>
* <K3:ValueJoiner(null,c)>
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoiner} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} instance to configure serdes and state stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoiner}, one for each matched record-pair with the same key plus one for each non-matching record of
* both {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoiner, JoinWindows, StreamJoined)
* @see #leftJoin(KStream, ValueJoiner, JoinWindows, StreamJoined)
*/
KStream outerJoin(final KStream otherStream,
final ValueJoiner super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with another {@code KStream}'s records using windowed outer equi join using the
* {@link StreamJoined} instance for configuration of the {@link Serde key serde}, {@link Serde this stream's value
* serde}, {@link Serde the other stream's value serde}, and used state stores.
* In contrast to {@link #join(KStream, ValueJoinerWithKey, JoinWindows) inner-join} or
* {@link #leftJoin(KStream, ValueJoinerWithKey, JoinWindows) left-join}, all records from both streams will produce at
* least one output record (cf. below).
* The join is computed on the records' key with join attribute {@code thisKStream.key == otherKStream.key}.
* Furthermore, two records are only joined if their timestamps are close to each other as defined by the given
* {@link JoinWindows}, i.e., the window defines an additional join predicate on the record timestamps.
*
* For each pair of records meeting both join predicates the provided {@link ValueJoinerWithKey} will be called to compute
* a value (with arbitrary type) for the result record.
* Note that the key is read-only and should not be modified, as this can lead to undefined behaviour.
* The key of the result record is the same as for both joining input records.
* Furthermore, for each input record of both {@code KStream}s that does not satisfy the join predicate the provided
* {@link ValueJoinerWithKey} will be called with a {@code null} value for this/other stream, respectively.
* If an input record key or value is {@code null} the record will not be included in the join operation and thus no
* output record will be added to the resulting {@code KStream}.
*
* Example (assuming all input records belong to the correct windows):
*
*
* this
* other
* result
*
*
* <K1:A>
*
* <K1:ValueJoinerWithKey(K1,A,null)>
*
*
* <K2:B>
* <K2:b>
* <K2:ValueJoinerWithKey(K2,null,b)>
<K2:ValueJoinerWithKey(K2,B,b)>
*
*
*
* <K3:c>
* <K3:ValueJoinerWithKey(K3,null,c)>
*
*
* Both input streams (or to be more precise, their underlying source topics) need to have the same number of
* partitions.
* If this is not the case, you would need to call {@link #repartition(Repartitioned)} (for one input stream) before
* doing the join and specify the "correct" number of partitions via {@link Repartitioned} parameter.
* Furthermore, both input streams need to be co-partitioned on the join key (i.e., use the same partitioner).
* If this requirement is not met, Kafka Streams will automatically repartition the data, i.e., it will create an
* internal repartitioning topic in Kafka and write and re-read the data via this topic before the actual join.
* The repartitioning topic will be named "${applicationId}-<name>-repartition", where "applicationId" is
* user-specified in {@link StreamsConfig} via parameter
* {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG}, "<name>" is an internally generated
* name, and "-repartition" is a fixed suffix.
*
* Repartitioning can happen for one or both of the joining {@code KStream}s.
* For this case, all data of the stream will be redistributed through the repartitioning topic by writing all
* records to it, and rereading all records from it, such that the join input {@code KStream} is partitioned
* correctly on its key.
*
* Both of the joining {@code KStream}s will be materialized in local state stores with auto-generated store names,
* unless a name is provided via a {@code Materialized} instance.
* For failure and recovery each store will be backed by an internal changelog topic that will be created in Kafka.
* The changelog topic will be named "${applicationId}-<storename>-changelog", where "applicationId" is user-specified
* in {@link StreamsConfig} via parameter {@link StreamsConfig#APPLICATION_ID_CONFIG APPLICATION_ID_CONFIG},
* "storeName" is an internally generated name, and "-changelog" is a fixed suffix.
*
* You can retrieve all generated internal topic names via {@link Topology#describe()}.
*
* @param the value type of the other stream
* @param the value type of the result stream
* @param otherStream the {@code KStream} to be joined with this stream
* @param joiner a {@link ValueJoinerWithKey} that computes the join result for a pair of matching records
* @param windows the specification of the {@link JoinWindows}
* @param streamJoined a {@link StreamJoined} instance to configure serdes and state stores
* @return a {@code KStream} that contains join-records for each key and values computed by the given
* {@link ValueJoinerWithKey}, one for each matched record-pair with the same key plus one for each non-matching record of
* both {@code KStream} and within the joining window intervals
* @see #join(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
* @see #leftJoin(KStream, ValueJoinerWithKey, JoinWindows, StreamJoined)
*/
KStream outerJoin(final KStream otherStream,
final ValueJoinerWithKey super K, ? super V, ? super VO, ? extends VR> joiner,
final JoinWindows windows,
final StreamJoined streamJoined);
/**
* Join records of this stream with {@link KTable}'s records using non-windowed inner equi join with default
* serializers and deserializers.
* The join is a primary key table lookup join with join attribute {@code stream.key == table.key}.
* "Table lookup join" means, that results are only computed if {@code KStream} records are processed.
* This is done by performing a lookup for matching records in the current (i.e., processing time) internal
* {@link KTable} state.
* In contrast, processing {@link KTable} input records will only update the internal {@link KTable} state and
* will not produce any result records.
*
* For each {@code KStream} record that finds a corresponding record in {@link KTable} the provided
* {@link ValueJoiner} will be called to compute a value (with arbitrary type) for the result record.
* The key of the result record is the same as for both joining input records.
* If an {@code KStream} input record key or value is {@code null} the record will not be included in the join
* operation and thus no output record will be added to the resulting {@code KStream}.
*
* Example:
*
*
*