All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.ray.streaming.jobgraph.JobGraphOptimizer Maven / Gradle / Ivy

package io.ray.streaming.jobgraph;

import io.ray.streaming.api.Language;
import io.ray.streaming.api.partition.Partition;
import io.ray.streaming.api.partition.impl.ForwardPartition;
import io.ray.streaming.api.partition.impl.RoundRobinPartition;
import io.ray.streaming.operator.ChainStrategy;
import io.ray.streaming.operator.StreamOperator;
import io.ray.streaming.operator.chain.ChainedOperator;
import io.ray.streaming.python.PythonOperator;
import io.ray.streaming.python.PythonOperator.ChainedPythonOperator;
import io.ray.streaming.python.PythonPartition;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.tuple.Pair;

/**
 * Optimize job graph by chaining some operators so that some operators can be run in the same
 * thread.
 */
public class JobGraphOptimizer {

  private final JobGraph jobGraph;
  private Set visited = new HashSet<>();
  // vertex id -> vertex
  private Map vertexMap;
  private Map> outputEdgesMap;
  // tail vertex id -> mergedVertex
  private Map>> mergedVertexMap;

  public JobGraphOptimizer(JobGraph jobGraph) {
    this.jobGraph = jobGraph;
    vertexMap =
        jobGraph.getJobVertices().stream()
            .collect(Collectors.toMap(JobVertex::getVertexId, Function.identity()));
    outputEdgesMap =
        vertexMap.keySet().stream()
            .collect(
                Collectors.toMap(
                    id -> vertexMap.get(id),
                    id -> new HashSet<>(jobGraph.getVertexOutputEdges(id))));
    mergedVertexMap = new HashMap<>();
  }

  public JobGraph optimize() {
    // Deep-first traverse nodes from source to sink to merge vertices that can be chained
    // together.
    jobGraph
        .getSourceVertices()
        .forEach(
            vertex -> {
              List verticesToMerge = new ArrayList<>();
              verticesToMerge.add(vertex);
              mergeVerticesRecursively(vertex, verticesToMerge);
            });

    List vertices =
        mergedVertexMap.values().stream().map(Pair::getLeft).collect(Collectors.toList());

    return new JobGraph(jobGraph.getJobName(), jobGraph.getJobConfig(), vertices, createEdges());
  }

  private void mergeVerticesRecursively(JobVertex vertex, List verticesToMerge) {
    if (!visited.contains(vertex)) {
      visited.add(vertex);
      Set outputEdges = outputEdgesMap.get(vertex);
      if (outputEdges.isEmpty()) {
        mergeAndAddVertex(verticesToMerge);
      } else {
        outputEdges.forEach(
            edge -> {
              JobVertex succeedingVertex = vertexMap.get(edge.getTargetVertexId());
              if (canBeChained(vertex, succeedingVertex, edge)) {
                verticesToMerge.add(succeedingVertex);
                mergeVerticesRecursively(succeedingVertex, verticesToMerge);
              } else {
                mergeAndAddVertex(verticesToMerge);
                List newMergedVertices = new ArrayList<>();
                newMergedVertices.add(succeedingVertex);
                mergeVerticesRecursively(succeedingVertex, newMergedVertices);
              }
            });
      }
    }
  }

  private void mergeAndAddVertex(List verticesToMerge) {
    JobVertex mergedVertex;
    JobVertex headVertex = verticesToMerge.get(0);
    Language language = headVertex.getLanguage();
    if (verticesToMerge.size() == 1) {
      // no chain
      mergedVertex = headVertex;
    } else {
      List operators =
          verticesToMerge.stream()
              .map(v -> vertexMap.get(v.getVertexId()).getStreamOperator())
              .collect(Collectors.toList());
      List> configs =
          verticesToMerge.stream()
              .map(v -> vertexMap.get(v.getVertexId()).getConfig())
              .collect(Collectors.toList());
      StreamOperator operator;
      if (language == Language.JAVA) {
        operator = ChainedOperator.newChainedOperator(operators, configs);
      } else {
        List pythonOperators =
            operators.stream().map(o -> (PythonOperator) o).collect(Collectors.toList());
        operator = new ChainedPythonOperator(pythonOperators, configs);
      }
      // chained operator config is placed into `ChainedOperator`.
      mergedVertex =
          new JobVertex(
              headVertex.getVertexId(),
              headVertex.getParallelism(),
              headVertex.getVertexType(),
              operator,
              new HashMap<>());
    }

    mergedVertexMap.put(mergedVertex.getVertexId(), Pair.of(mergedVertex, verticesToMerge));
  }

  private List createEdges() {
    List edges = new ArrayList<>();
    mergedVertexMap.forEach(
        (id, pair) -> {
          JobVertex mergedVertex = pair.getLeft();
          List mergedVertices = pair.getRight();
          JobVertex tailVertex = mergedVertices.get(mergedVertices.size() - 1);
          // input edge will be set up in input vertices
          if (outputEdgesMap.containsKey(tailVertex)) {
            outputEdgesMap
                .get(tailVertex)
                .forEach(
                    edge -> {
                      Pair> downstreamPair =
                          mergedVertexMap.get(edge.getTargetVertexId());
                      // change ForwardPartition to RoundRobinPartition.
                      Partition partition = changePartition(edge.getPartition());
                      JobEdge newEdge =
                          new JobEdge(
                              mergedVertex.getVertexId(),
                              downstreamPair.getLeft().getVertexId(),
                              partition);
                      edges.add(newEdge);
                    });
          }
        });
    return edges;
  }

  /** Change ForwardPartition to RoundRobinPartition. */
  private Partition changePartition(Partition partition) {
    if (partition instanceof PythonPartition) {
      PythonPartition pythonPartition = (PythonPartition) partition;
      if (!pythonPartition.isConstructedFromBinary()
          && pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS)) {
        return PythonPartition.RoundRobinPartition;
      } else {
        return partition;
      }
    } else {
      if (partition instanceof ForwardPartition) {
        return new RoundRobinPartition();
      } else {
        return partition;
      }
    }
  }

  private boolean canBeChained(
      JobVertex precedingVertex, JobVertex succeedingVertex, JobEdge edge) {
    if (jobGraph.getVertexOutputEdges(precedingVertex.getVertexId()).size() > 1
        || jobGraph.getVertexInputEdges(succeedingVertex.getVertexId()).size() > 1) {
      return false;
    }
    if (precedingVertex.getParallelism() != succeedingVertex.getParallelism()) {
      return false;
    }
    if (precedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.NEVER
        || succeedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.NEVER
        || succeedingVertex.getStreamOperator().getChainStrategy() == ChainStrategy.HEAD) {
      return false;
    }
    if (precedingVertex.getLanguage() != succeedingVertex.getLanguage()) {
      return false;
    }
    Partition partition = edge.getPartition();
    if (!(partition instanceof PythonPartition)) {
      return partition instanceof ForwardPartition;
    } else {
      PythonPartition pythonPartition = (PythonPartition) partition;
      return !pythonPartition.isConstructedFromBinary()
          && pythonPartition.getFunctionName().equals(PythonPartition.FORWARD_PARTITION_CLASS);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy