Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
// Copyright 2017 Twitter. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.twitter.heron.streamlet.impl;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.logging.Logger;
import com.twitter.heron.api.topology.TopologyBuilder;
import com.twitter.heron.streamlet.JoinType;
import com.twitter.heron.streamlet.KeyValue;
import com.twitter.heron.streamlet.KeyedWindow;
import com.twitter.heron.streamlet.SerializableBiFunction;
import com.twitter.heron.streamlet.SerializableBinaryOperator;
import com.twitter.heron.streamlet.SerializableConsumer;
import com.twitter.heron.streamlet.SerializableFunction;
import com.twitter.heron.streamlet.SerializablePredicate;
import com.twitter.heron.streamlet.SerializableSupplier;
import com.twitter.heron.streamlet.SerializableTransformer;
import com.twitter.heron.streamlet.Sink;
import com.twitter.heron.streamlet.Source;
import com.twitter.heron.streamlet.Streamlet;
import com.twitter.heron.streamlet.WindowConfig;
import com.twitter.heron.streamlet.impl.streamlets.ConsumerStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.FilterStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.FlatMapStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.GeneralReduceByKeyAndWindowStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.JoinStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.LogStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.MapStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.ReduceByKeyAndWindowStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.RemapStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.SinkStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.SourceStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.SupplierStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.TransformStreamlet;
import com.twitter.heron.streamlet.impl.streamlets.UnionStreamlet;
/**
* A Streamlet is a (potentially unbounded) ordered collection of tuples.
* Streamlets originate from pub/sub systems(such Pulsar/Kafka), or from
* static data(such as csv files, HDFS files), or for that matter any other
* source. They are also created by transforming existing Streamlets using
* operations such as map/flatMap, etc.
* Besides the tuples, a Streamlet has the following properties associated with it
* a) name. User assigned or system generated name to refer the streamlet
* b) nPartitions. Number of partitions that the streamlet is composed of. Thus the
* ordering of the tuples in a Streamlet is wrt the tuples within a partition.
* This allows the system to distribute each partition to different nodes across the cluster.
* A bunch of transformations can be done on Streamlets(like map/flatMap, etc.). Each
* of these transformations operate on every tuple of the Streamlet and produce a new
* Streamlet. One can think of a transformation attaching itself to the stream and processing
* each tuple as they go by. Thus the parallelism of any operator is implicitly determined
* by the number of partitions of the stream that it is operating on. If a particular
* transformation wants to operate at a different parallelism, one can repartition the
* Streamlet before doing the transformation.
*/
public abstract class StreamletImpl implements Streamlet {
private static final Logger LOG = Logger.getLogger(StreamletImpl.class.getName());
protected String name;
protected int nPartitions;
private List> children;
private boolean built;
public boolean isBuilt() {
return built;
}
public boolean allBuilt() {
if (!built) {
return false;
}
for (StreamletImpl> child : children) {
if (!child.allBuilt()) {
return false;
}
}
return true;
}
protected enum StreamletNamePrefix {
CONSUMER("consumer"),
FILTER("filter"),
FLATMAP("flatmap"),
REDUCE("reduceByKeyAndWindow"),
JOIN("join"),
LOGGER("logger"),
MAP("map"),
REMAP("remap"),
SINK("sink"),
SOURCE("generator"),
SUPPLIER("supplier"),
TRANSFORM("transform"),
UNION("union");
private final String prefix;
StreamletNamePrefix(final String prefix) {
this.prefix = prefix;
}
@Override
public String toString() {
return prefix;
}
}
/**
* Gets all the children of this streamlet.
* Children of a streamlet are streamlets that are resulting from transformations of elements of
* this and potentially other streamlets.
* @return The kid streamlets
*/
public List> getChildren() {
return children;
}
/**
* Sets the name of the Streamlet.
* @param sName The name given by the user for this streamlet
* @return Returns back the Streamlet with changed name
*/
@Override
public Streamlet setName(String sName) {
require(sName != null && !sName.trim().isEmpty(),
"Streamlet name cannot be null/blank");
this.name = sName;
return this;
}
/**
* Gets the name of the Streamlet.
* @return Returns the name of the Streamlet
*/
@Override
public String getName() {
return name;
}
/**
* Sets a default unique name to the Streamlet by type if it is not set.
* Otherwise, just checks its uniqueness.
* @param prefix The name prefix of this streamlet
* @param stageNames The collections of created streamlet/stage names
*/
protected void setDefaultNameIfNone(StreamletNamePrefix prefix, Set stageNames) {
if (getName() == null) {
setName(defaultNameCalculator(prefix, stageNames));
}
if (stageNames.contains(getName())) {
throw new RuntimeException(String.format(
"The stage name %s is used multiple times in the same topology", getName()));
}
stageNames.add(getName());
}
/**
* Sets the number of partitions of the streamlet
* @param numPartitions The user assigned number of partitions
* @return Returns back the Streamlet with changed number of partitions
*/
@Override
public Streamlet setNumPartitions(int numPartitions) {
require(numPartitions > 0,
"Streamlet's partitions number should be > 0");
this.nPartitions = numPartitions;
return this;
}
/**
* Gets the number of partitions of this Streamlet.
* @return the number of partitions of this Streamlet
*/
@Override
public int getNumPartitions() {
return nPartitions;
}
/**
* Only used by the implementors
*/
protected StreamletImpl() {
this.nPartitions = -1;
this.children = new LinkedList<>();
this.built = false;
}
public void build(TopologyBuilder bldr, Set stageNames) {
if (built) {
throw new RuntimeException("Logic Error While building " + getName());
}
if (doBuild(bldr, stageNames)) {
built = true;
for (StreamletImpl> streamlet : children) {
streamlet.build(bldr, stageNames);
}
}
}
// This is the main interface that every Streamlet implementation should implement
// The main tasks are generally to make sure that appropriate names/partitions are
// computed and add a spout/bolt to the TopologyBuilder
protected abstract boolean doBuild(TopologyBuilder bldr, Set stageNames);
public void addChild(StreamletImpl child) {
children.add(child);
}
private String defaultNameCalculator(StreamletNamePrefix prefix, Set stageNames) {
int index = 1;
String calculatedName;
while (true) {
calculatedName = new StringBuilder(prefix.toString()).append(index).toString();
if (!stageNames.contains(calculatedName)) {
break;
}
index++;
}
LOG.info("Calculated stage Name as " + calculatedName);
return calculatedName;
}
/**
* Create a Streamlet based on the supplier function
* @param supplier The Supplier function to generate the elements
*/
static StreamletImpl createSupplierStreamlet(SerializableSupplier supplier) {
return new SupplierStreamlet(supplier);
}
/**
* Create a Streamlet based on the generator function
* @param generator The Generator function to generate the elements
*/
static StreamletImpl createGeneratorStreamlet(Source generator) {
return new SourceStreamlet(generator);
}
/**
* Return a new Streamlet by applying mapFn to each element of this Streamlet
* @param mapFn The Map Function that should be applied to each element
*/
@Override
public Streamlet map(SerializableFunction mapFn) {
MapStreamlet retval = new MapStreamlet<>(this, mapFn);
addChild(retval);
return retval;
}
/**
* Return a new Streamlet by applying flatMapFn to each element of this Streamlet and
* flattening the result
* @param flatMapFn The FlatMap Function that should be applied to each element
*/
@Override
public Streamlet flatMap(
SerializableFunction> flatMapFn) {
FlatMapStreamlet retval = new FlatMapStreamlet<>(this, flatMapFn);
addChild(retval);
return retval;
}
/**
* Return a new Streamlet by applying the filterFn on each element of this streamlet
* and including only those elements that satisfy the filterFn
* @param filterFn The filter Function that should be applied to each element
*/
@Override
public Streamlet filter(SerializablePredicate filterFn) {
FilterStreamlet retval = new FilterStreamlet<>(this, filterFn);
addChild(retval);
return retval;
}
/**
* Same as filter(Identity).setNumPartitions(nPartitions)
*/
@Override
public Streamlet repartition(int numPartitions) {
return this.map((a) -> a).setNumPartitions(numPartitions);
}
/**
* A more generalized version of repartition where a user can determine which partitions
* any particular tuple should go to
*/
@Override
public Streamlet repartition(int numPartitions,
SerializableBiFunction> partitionFn) {
RemapStreamlet retval = new RemapStreamlet<>(this, partitionFn);
retval.setNumPartitions(numPartitions);
addChild(retval);
return retval;
}
/**
* Clones the current Streamlet. It returns an array of numClones Streamlets where each
* Streamlet contains all the tuples of the current Streamlet
* @param numClones The number of clones to clone
*/
@Override
public List> clone(int numClones) {
List> retval = new ArrayList<>();
for (int i = 0; i < numClones; ++i) {
retval.add(repartition(getNumPartitions()));
}
return retval;
}
/**
* Return a new Streamlet by inner joining 'this streamlet with ‘other’ streamlet.
* The join is done over elements accumulated over a time window defined by windowCfg.
* The elements are compared using the thisKeyExtractor for this streamlet with the
* otherKeyExtractor for the other streamlet. On each matching pair, the joinFunction is applied.
* @param other The Streamlet that we are joining with.
* @param thisKeyExtractor The function applied to a tuple of this streamlet to get the key
* @param otherKeyExtractor The function applied to a tuple of the other streamlet to get the key
* @param windowCfg This is a specification of what kind of windowing strategy you like to
* have. Typical windowing strategies are sliding windows and tumbling windows
* @param joinFunction The join function that needs to be applied
*/
@Override
public Streamlet, T>>
join(Streamlet other, SerializableFunction thisKeyExtractor,
SerializableFunction otherKeyExtractor, WindowConfig windowCfg,
SerializableBiFunction joinFunction) {
return join(other, thisKeyExtractor, otherKeyExtractor,
windowCfg, JoinType.INNER, joinFunction);
}
/**
* Return a new KVStreamlet by joining 'this streamlet with ‘other’ streamlet. The type of joining
* is declared by the joinType parameter.
* The join is done over elements accumulated over a time window defined by windowCfg.
* The elements are compared using the thisKeyExtractor for this streamlet with the
* otherKeyExtractor for the other streamlet. On each matching pair, the joinFunction is applied.
* Types of joins {@link JoinType}
* @param other The Streamlet that we are joining with.
* @param thisKeyExtractor The function applied to a tuple of this streamlet to get the key
* @param otherKeyExtractor The function applied to a tuple of the other streamlet to get the key
* @param windowCfg This is a specification of what kind of windowing strategy you like to
* have. Typical windowing strategies are sliding windows and tumbling windows
* @param joinType Type of Join. Options {@link JoinType}
* @param joinFunction The join function that needs to be applied
*/
@Override
public Streamlet, T>>
join(Streamlet other, SerializableFunction thisKeyExtractor,
SerializableFunction otherKeyExtractor, WindowConfig windowCfg,
JoinType joinType, SerializableBiFunction joinFunction) {
StreamletImpl joinee = (StreamletImpl) other;
JoinStreamlet retval = JoinStreamlet.createJoinStreamlet(
this, joinee, thisKeyExtractor, otherKeyExtractor, windowCfg, joinType, joinFunction);
addChild(retval);
joinee.addChild(retval);
return retval;
}
/**
* Return a new Streamlet accumulating tuples of this streamlet over a Window defined by
* windowCfg and applying reduceFn on those tuples.
* @param keyExtractor The function applied to a tuple of this streamlet to get the key
* @param valueExtractor The function applied to a tuple of this streamlet to extract the value
* to be reduced on
* @param windowCfg This is a specification of what kind of windowing strategy you like to have.
* Typical windowing strategies are sliding windows and tumbling windows
* @param reduceFn The reduce function that you want to apply to all the values of a key.
*/
@Override
public Streamlet, V>> reduceByKeyAndWindow(
SerializableFunction keyExtractor, SerializableFunction valueExtractor,
WindowConfig windowCfg, SerializableBinaryOperator reduceFn) {
ReduceByKeyAndWindowStreamlet retval =
new ReduceByKeyAndWindowStreamlet<>(this, keyExtractor, valueExtractor,
windowCfg, reduceFn);
addChild(retval);
return retval;
}
/**
* Return a new Streamlet accumulating tuples of this streamlet over a Window defined by
* windowCfg and applying reduceFn on those tuples. For each window, the value identity is used
* as a initial value. All the matching tuples are reduced using reduceFn starting from this
* initial value.
* @param keyExtractor The function applied to a tuple of this streamlet to get the key
* @param windowCfg This is a specification of what kind of windowing strategy you like to have.
* Typical windowing strategies are sliding windows and tumbling windows
* @param identity The identity element is both the initial value inside the reduction window
* and the default result if there are no elements in the window
* @param reduceFn The reduce function takes two parameters: a partial result of the reduction
* and the next element of the stream. It returns a new partial result.
*/
@Override
public Streamlet, T>> reduceByKeyAndWindow(
SerializableFunction keyExtractor, WindowConfig windowCfg,
T identity, SerializableBiFunction reduceFn) {
GeneralReduceByKeyAndWindowStreamlet retval =
new GeneralReduceByKeyAndWindowStreamlet<>(this, keyExtractor, windowCfg,
identity, reduceFn);
addChild(retval);
return retval;
}
/**
* Returns a new Streamlet that is the union of this and the ‘other’ streamlet. Essentially
* the new streamlet will contain tuples belonging to both Streamlets
*/
@Override
public Streamlet union(Streamlet extends R> other) {
StreamletImpl extends R> joinee = (StreamletImpl extends R>) other;
UnionStreamlet retval = new UnionStreamlet<>(this, joinee);
addChild(retval);
joinee.addChild(retval);
return retval;
}
/**
* Logs every element of the streamlet using String.valueOf function
* Note that LogStreamlet is an empty streamlet. That is its a streamlet
* that does not contain any tuple. Thus this function returns void.
*/
@Override
public void log() {
LogStreamlet logger = new LogStreamlet<>(this);
addChild(logger);
}
/**
* Applies the consumer function for every element of this streamlet
* @param consumer The user supplied consumer function that is invoked for each element
*/
@Override
public void consume(SerializableConsumer consumer) {
ConsumerStreamlet consumerStreamlet = new ConsumerStreamlet<>(this, consumer);
addChild(consumerStreamlet);
}
/**
* Uses the sink to consume every element of this streamlet
* @param sink The Sink that consumes
*/
@Override
public void toSink(Sink sink) {
SinkStreamlet sinkStreamlet = new SinkStreamlet<>(this, sink);
addChild(sinkStreamlet);
}
/**
* Returns a new Streamlet by applying the transformFunction on each element of this streamlet.
* Before starting to cycle the transformFunction over the Streamlet, the open function is called.
* This allows the transform Function to do any kind of initialization/loading, etc.
* @param serializableTransformer The transformation function to be applied
* @param The return type of the transform
* @return Streamlet containing the output of the transformFunction
*/
@Override
public Streamlet transform(
SerializableTransformer serializableTransformer) {
TransformStreamlet transformStreamlet =
new TransformStreamlet<>(this, serializableTransformer);
addChild(transformStreamlet);
return transformStreamlet;
}
/**
* Verifies the requirement as the utility function.
* @param requirement The requirement to verify
* @param errorMessage The error message
* @throws IllegalArgumentException if the requirement fails
*/
private void require(Boolean requirement, String errorMessage) {
if (!requirement) {
throw new IllegalArgumentException(errorMessage);
}
}
}