All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.ray.streaming.api.stream.DataStream Maven / Gradle / Ivy

There is a newer version: 1.10.0
Show newest version
package io.ray.streaming.api.stream;

import io.ray.streaming.api.Language;
import io.ray.streaming.api.context.StreamingContext;
import io.ray.streaming.api.function.impl.FilterFunction;
import io.ray.streaming.api.function.impl.FlatMapFunction;
import io.ray.streaming.api.function.impl.KeyFunction;
import io.ray.streaming.api.function.impl.MapFunction;
import io.ray.streaming.api.function.impl.SinkFunction;
import io.ray.streaming.api.partition.Partition;
import io.ray.streaming.api.partition.impl.BroadcastPartition;
import io.ray.streaming.operator.StreamOperator;
import io.ray.streaming.operator.impl.FilterOperator;
import io.ray.streaming.operator.impl.FlatMapOperator;
import io.ray.streaming.operator.impl.KeyByOperator;
import io.ray.streaming.operator.impl.MapOperator;
import io.ray.streaming.operator.impl.SinkOperator;
import io.ray.streaming.python.stream.PythonDataStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

/**
 * Represents a stream of data.
 *
 * 

This class defines all the streaming operations. * * @param Type of data in the stream. */ public class DataStream extends Stream, T> { public DataStream(StreamingContext streamingContext, StreamOperator streamOperator) { super(streamingContext, streamOperator); } public DataStream( StreamingContext streamingContext, StreamOperator streamOperator, Partition partition) { super(streamingContext, streamOperator, partition); } public DataStream(DataStream input, StreamOperator streamOperator) { super(input, streamOperator); } public DataStream( DataStream input, StreamOperator streamOperator, Partition partition) { super(input, streamOperator, partition); } /** * Create a java stream that reference passed python stream. Changes in new stream will be * reflected in referenced stream and vice versa */ public DataStream(PythonDataStream referencedStream) { super(referencedStream); } /** * Apply a map function to this stream. * * @param mapFunction The map function. * @param Type of data returned by the map function. * @return A new DataStream. */ public DataStream map(MapFunction mapFunction) { return new DataStream<>(this, new MapOperator<>(mapFunction)); } /** * Apply a flat-map function to this stream. * * @param flatMapFunction The FlatMapFunction * @param Type of data returned by the flatmap function. * @return A new DataStream */ public DataStream flatMap(FlatMapFunction flatMapFunction) { return new DataStream<>(this, new FlatMapOperator<>(flatMapFunction)); } public DataStream filter(FilterFunction filterFunction) { return new DataStream<>(this, new FilterOperator<>(filterFunction)); } /** * Apply union transformations to this stream by merging {@link DataStream} outputs of the same * type with each other. * * @param stream The DataStream to union output with. * @param others The other DataStreams to union output with. * @return A new UnionStream. */ @SafeVarargs public final DataStream union(DataStream stream, DataStream... others) { List> streams = new ArrayList<>(); streams.add(stream); streams.addAll(Arrays.asList(others)); return union(streams); } /** * Apply union transformations to this stream by merging {@link DataStream} outputs of the same * type with each other. * * @param streams The DataStreams to union output with. * @return A new UnionStream. */ public final DataStream union(List> streams) { if (this instanceof UnionStream) { UnionStream unionStream = (UnionStream) this; streams.forEach(unionStream::addStream); return unionStream; } else { return new UnionStream<>(this, streams); } } /** * Apply a join transformation to this stream, with another stream. * * @param other Another stream. * @param The type of the other stream data. * @param The type of the data in the joined stream. * @return A new JoinStream. */ public JoinStream join(DataStream other) { return new JoinStream<>(this, other); } public DataStream process() { // TODO(zhenxuanpan): Need to add processFunction. return new DataStream(this, null); } /** * Apply a sink function and get a StreamSink. * * @param sinkFunction The sink function. * @return A new StreamSink. */ public DataStreamSink sink(SinkFunction sinkFunction) { return new DataStreamSink<>(this, new SinkOperator<>(sinkFunction)); } /** * Apply a key-by function to this stream. * * @param keyFunction the key function. * @param The type of the key. * @return A new KeyDataStream. */ public KeyDataStream keyBy(KeyFunction keyFunction) { checkPartitionCall(); return new KeyDataStream<>(this, new KeyByOperator<>(keyFunction)); } /** * Apply broadcast to this stream. * * @return This stream. */ public DataStream broadcast() { checkPartitionCall(); return setPartition(new BroadcastPartition<>()); } /** * Apply a partition to this stream. * * @param partition The partitioning strategy. * @return This stream. */ public DataStream partitionBy(Partition partition) { checkPartitionCall(); return setPartition(partition); } /** * If parent stream is a python stream, we can't call partition related methods in the java * stream. */ private void checkPartitionCall() { if (getInputStream() != null && getInputStream().getLanguage() == Language.PYTHON) { throw new RuntimeException( "Partition related methods can't be called on a " + "java stream if parent stream is a python stream."); } } /** * Convert this stream as a python stream. The converted stream and this stream are the same * logical stream, which has same stream id. Changes in converted stream will be reflected in this * stream and vice versa. */ public PythonDataStream asPythonStream() { return new PythonDataStream(this); } @Override public Language getLanguage() { return Language.JAVA; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy