All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.OperatorUtils Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.NodeUtils.Function;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.mapred.OutputCollector;

import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.Multimap;

public class OperatorUtils {

  private static final Log LOG = LogFactory.getLog(OperatorUtils.class);

  public static  Set findOperators(Operator start, Class clazz) {
    return findOperators(start, clazz, new HashSet());
  }

  public static  T findSingleOperator(Operator start, Class clazz) {
    Set found = findOperators(start, clazz, new HashSet());
    return found.size() == 1 ? found.iterator().next() : null;
  }

  public static  Set findOperators(Collection> starts, Class clazz) {
    Set found = new HashSet();
    for (Operator start : starts) {
      if (start == null) {
        continue;
      }
      findOperators(start, clazz, found);
    }
    return found;
  }

  @SuppressWarnings("unchecked")
  private static  Set findOperators(Operator start, Class clazz, Set found) {
    if (clazz.isInstance(start)) {
      found.add((T) start);
    }
    if (start.getChildOperators() != null) {
      for (Operator child : start.getChildOperators()) {
        findOperators(child, clazz, found);
      }
    }
    return found;
  }

  public static  Set findOperatorsUpstream(Operator start, Class clazz) {
    return findOperatorsUpstream(start, clazz, new HashSet());
  }

  public static  T findSingleOperatorUpstream(Operator start, Class clazz) {
    Set found = findOperatorsUpstream(start, clazz, new HashSet());
    return found.size() == 1 ? found.iterator().next() : null;
  }

  public static  Set findOperatorsUpstream(Collection> starts, Class clazz) {
    Set found = new HashSet();
    for (Operator start : starts) {
      findOperatorsUpstream(start, clazz, found);
    }
    return found;
  }

  @SuppressWarnings("unchecked")
  private static  Set findOperatorsUpstream(Operator start, Class clazz, Set found) {
    if (clazz.isInstance(start)) {
      found.add((T) start);
    }
    if (start.getParentOperators() != null) {
      for (Operator parent : start.getParentOperators()) {
        findOperatorsUpstream(parent, clazz, found);
      }
    }
    return found;
  }

  public static void setChildrenCollector(List> childOperators, OutputCollector out) {
    if (childOperators == null) {
      return;
    }
    for (Operator op : childOperators) {
      if (op.getName().equals(ReduceSinkOperator.getOperatorName())) {
        op.setOutputCollector(out);
      } else {
        setChildrenCollector(op.getChildOperators(), out);
      }
    }
  }

  public static void setChildrenCollector(List> childOperators, Map outMap) {
    if (childOperators == null) {
      return;
    }
    for (Operator op : childOperators) {
      if(op.getName().equals(ReduceSinkOperator.getOperatorName())) {
        ReduceSinkOperator rs = ((ReduceSinkOperator)op);
        if (outMap.containsKey(rs.getConf().getOutputName())) {
          LOG.info("Setting output collector: " + rs + " --> "
            + rs.getConf().getOutputName());
          rs.setOutputCollector(outMap.get(rs.getConf().getOutputName()));
        }
      } else {
        setChildrenCollector(op.getChildOperators(), outMap);
      }
    }
  }

  /**
   * Starting at the input operator, finds the last operator in the stream that
   * is an instance of the input class.
   *
   * @param op the starting operator
   * @param clazz the class that the operator that we are looking for instantiates
   * @return null if no such operator exists or multiple branches are found in
   * the stream, the last operator otherwise
   */
  @SuppressWarnings("unchecked")
  public static  T findLastOperator(Operator op, Class clazz) {
    Operator currentOp = op;
    T lastOp = null;
    while (currentOp != null) {
      if (clazz.isInstance(currentOp)) {
        lastOp = (T) currentOp;
      }
      if (currentOp.getChildOperators().size() == 1) {
        currentOp = currentOp.getChildOperators().get(0);
      }
      else {
        currentOp = null;
      }
    }
    return lastOp;
  }

  /**
   * Starting at the input operator, finds the last operator upstream that is
   * an instance of the input class.
   *
   * @param op the starting operator
   * @param clazz the class that the operator that we are looking for instantiates
   * @return null if no such operator exists or multiple branches are found in
   * the stream, the last operator otherwise
   */
  @SuppressWarnings("unchecked")
  public static  T findLastOperatorUpstream(Operator op, Class clazz) {
    Operator currentOp = op;
    T lastOp = null;
    while (currentOp != null) {
      if (clazz.isInstance(currentOp)) {
        lastOp = (T) currentOp;
      }
      if (currentOp.getParentOperators().size() == 1) {
        currentOp = currentOp.getParentOperators().get(0);
      }
      else {
        currentOp = null;
      }
    }
    return lastOp;
  }

  public static void iterateParents(Operator operator, Function> function) {
    iterateParents(operator, function, new HashSet>());
  }

  private static void iterateParents(Operator operator, Function> function, Set> visited) {
    if (!visited.add(operator)) {
      return;
    }
    function.apply(operator);
    if (operator.getNumParent() > 0) {
      for (Operator parent : operator.getParentOperators()) {
        iterateParents(parent, function, visited);
      }
    }
  }

  public static boolean sameRowSchema(Operator operator1, Operator operator2) {
	return operator1.getSchema().equals(operator2.getSchema());
  }

  /**
   * Given an operator and a set of classes, it classifies the operators it finds
   * in the stream depending on the classes they instantiate.
   *
   * If a given operator object is an instance of more than one of the input classes,
   * e.g. the operator instantiates one of the classes in the input set that is a
   * subclass of another class in the set, the operator will be associated to both
   * classes in the output map.
   *
   * @param start the start operator
   * @param classes the set of classes
   * @return a multimap from each of the classes to the operators that instantiate
   * them
   */
  public static Multimap>, Operator> classifyOperators(
        Operator start, Set>> classes) {
    ImmutableMultimap.Builder>, Operator> resultMap =
          new ImmutableMultimap.Builder>, Operator>();
    List> ops = new ArrayList>();
    ops.add(start);
    while (!ops.isEmpty()) {
      List> allChildren = new ArrayList>();
      for (Operator op: ops) {
        for (Class> clazz: classes) {
          if (clazz.isInstance(op)) {
            resultMap.put(clazz, op);
          }
        }
        allChildren.addAll(op.getChildOperators());
      }
      ops = allChildren;
    }
    return resultMap.build();
  }

  /**
   * Given an operator and a set of classes, it classifies the operators it finds
   * upstream depending on the classes it instantiates.
   *
   * If a given operator object is an instance of more than one of the input classes,
   * e.g. the operator instantiates one of the classes in the input set that is a
   * subclass of another class in the set, the operator will be associated to both
   * classes in the output map.
   *
   * @param start the start operator
   * @param classes the set of classes
   * @return a multimap from each of the classes to the operators that instantiate
   * them
   */
  public static Multimap>, Operator> classifyOperatorsUpstream(
        Operator start, Set>> classes) {
    ImmutableMultimap.Builder>, Operator> resultMap =
          new ImmutableMultimap.Builder>, Operator>();
    List> ops = new ArrayList>();
    ops.add(start);
    while (!ops.isEmpty()) {
      List> allParent = new ArrayList>();
      for (Operator op: ops) {
        for (Class> clazz: classes) {
          if (clazz.isInstance(op)) {
            resultMap.put(clazz, op);
          }
        }
        if (op.getParentOperators() != null) {
          allParent.addAll(op.getParentOperators());
        }
      }
      ops = allParent;
    }
    return resultMap.build();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy