All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.dstream.DStreamOperation Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.dstream;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.stream.Stream;

import io.dstream.SerializableStreamAssets.SerFunction;
import io.dstream.function.KeyValueMappingFunction;

/**
 * Represents an assembled and final unit of work (i.e., execution stage) to be 
 * used by a target execution environment when building target specific 
 * execution (e.g., DAG in Tez or Spark).
* * It consists of all attributes required to build a target specific execution. * The two most important once are:
*
    *
  1. * The {@link Serializable} lambda expression which includes the lambda * expression provided by the user (see {@link #getStreamOperationFunction()}).
    * NOTE: While preserving end user's intentions, the final lambda * may be the result of a composition with other implicit or explicit lambdas * during optimization phase. *
  2. *
  3. * Dependent operations which is a {@link List} of individual {@link DStreamExecutionGraph} * that in essence represents another execution pipeline. Dependent operations * can only be present if this operation is performing some * type of streams combine operation (e.g., join, union, unionAll) *
  4. *
*/ public final class DStreamOperation { private final int id; private DStreamOperation parent; @SuppressWarnings("rawtypes") private SerFunction streamOperationFunction; private List operationNames; private List combinableExecutionGraphs; /** * Constructs this {@link DStreamOperation} with the given id. */ DStreamOperation(int id){ this(id, null); } /** * Constructs this {@link DStreamOperation} with the given id and parent * operation. */ DStreamOperation(int id, DStreamOperation parent) { this.parent = parent; this.operationNames = new ArrayList<>(); this.id = id; } /** * Returns true if this operation's function is an * instance of {@link AbstractStreamMergingFunction} */ public boolean isStreamsCombiner() { return this.streamOperationFunction instanceof AbstractStreamMergingFunction; } /** * Returns the id of this {@link DStreamOperation}. */ public int getId(){ return this.id; } /** * */ public String toString(){ return this.operationNames.toString(); } /** * Returns the {@link List} of combinable {@link DStreamExecutionGraph} * where each {@link DStreamExecutionGraph} implies some type of * combine functionality with current operation (i.e., join, union, unionAll etc.) */ public List getCombinableExecutionGraphs(){ return this.combinableExecutionGraphs == null ? Collections.emptyList() : Collections.unmodifiableList(this.combinableExecutionGraphs); } /** * Returns true if the last operation which composes this {@link DStreamOperation} * if {@link Ops#classify} */ public boolean isClassify(){ return Ops.classify.name().equals(this.getLastOperationName()); } /** * Returns true if this {@link DStreamOperation} represents a shuffle * operation (see {@link Ops#isShuffle(String)} */ public boolean isShuffle(){ if (this.operationNames.size() > 0){ String operationName = this.operationNames.get(0); return Ops.isShuffle(operationName); } return false; } /** * Returns a {@link SerFunction} to be applied on the localized {@link Stream} of data * processed by a target task. * It includes the lambda expression provided by the end user.
* NOTE: While preserving end user's intentions, the final function * may be the result of a composition with other implicit or explicit functions * during optimization phase (see {@link #addStreamOperationFunction(String, SerFunction)}). */ @SuppressWarnings("unchecked") public SerFunction, Stream> getStreamOperationFunction() { return this.streamOperationFunction; } /** * Returns the last operation which composes this {@link DStreamOperation}.
* For example: *
	 * DStream.ofType(String.class, "wc")
	 *     .flatMap(..)
	 *     .map(..)
	 *     .filter(..)
	 *     . . .
	 * 
* In the above, all three operations are composable-transformations * and will be composed into a single {@link DStreamOperation} with the list of * operation names - [flatMap, map, filter]. In the given scenario this method * will return 'filter' since it is the last operation name composing this * {@link DStreamOperation}. */ public String getLastOperationName() { return this.operationNames.size() > 0 ? this.operationNames.get(this.operationNames.size()-1) : null; } /** * Will add the given {@link SerFunction} to this {@link DStreamOperation} by * composing it with the previous function. If previous function is null * the given function becomes the root function of this operation.
* It also adds the given operationName to the list of operation names * which composes this {@link DStreamOperation}.
* The final (composed) function represents the function to applied on the * localized {@link Stream} of data processed by a target task. */ @SuppressWarnings("unchecked") void addStreamOperationFunction(String operationName, SerFunction function){ this.operationNames.add(operationName); this.streamOperationFunction = this.streamOperationFunction != null ? this.streamOperationFunction.andThen(function) : function; if (function instanceof KeyValueMappingFunction){ if ( ((KeyValueMappingFunction)function).aggregatesValues() ) { String lastOperationName = this.operationNames.get(this.operationNames.size()-1); lastOperationName = lastOperationName + "{reducingValues}"; this.operationNames.set(this.operationNames.size()-1, lastOperationName); } } } /** * Sets the given instance of {@link AbstractStreamMergingFunction} as the * function of this {@link DStreamOperation}. */ void setStreamsCombiner(String operationName, AbstractStreamMergingFunction streamsCombiner) { this.operationNames.add(operationName); this.streamOperationFunction = streamsCombiner; } /** * Returns {@link DStreamOperation} which is a parent to this operation. */ DStreamOperation getParent(){ return this.parent; } /** * Ads the given {@link DStreamExecutionGraph} to the {@link List} of combinable {@link DStreamExecutionGraph}. * Each added {@link DStreamExecutionGraph} implies some type of * combine functionality with current operation (i.e., join, union, unionAll etc.) */ void addCombinableExecutionGraph(DStreamExecutionGraph combinableExecutionGraph){ if (this.combinableExecutionGraphs == null){ this.combinableExecutionGraphs = new ArrayList<>(); } this.combinableExecutionGraphs.add(combinableExecutionGraph); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy