org.apache.hadoop.hive.ql.exec.PTFOperator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.0
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.Serializable;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Stack;
import java.util.concurrent.Future;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.PTFPartition.PTFPartitionIterator;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.PTFDesc;
import org.apache.hadoop.hive.ql.plan.PTFDeserializer;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.plan.ptf.PTFExpressionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PTFInputDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionDef;
import org.apache.hadoop.hive.ql.plan.ptf.PartitionedTableFunctionDef;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLeadLag;
import org.apache.hadoop.hive.ql.udf.ptf.TableFunctionEvaluator;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;

public class PTFOperator extends Operator implements Serializable {

  private static final long serialVersionUID = 1L;
  boolean isMapOperator;

  transient KeyWrapperFactory keyWrapperFactory;
  protected transient KeyWrapper currentKeys;
  protected transient KeyWrapper newKeys;
  /*
   * for map-side invocation of PTFs, we cannot utilize the currentkeys null check
   * to decide on invoking startPartition in streaming mode. Hence this extra flag.
   */
  transient boolean firstMapRow;
  transient Configuration hiveConf;
  transient PTFInvocation ptfInvocation;

  /*
   * 1. Find out if the operator is invoked at Map-Side or Reduce-side
   * 2. Get the deserialized QueryDef
   * 3. Reconstruct the transient variables in QueryDef
   * 4. Create input partition to store rows coming from previous operator
   */
  @Override
  protected Collection> initializeOp(Configuration jobConf) throws HiveException {
    Collection> result = super.initializeOp(jobConf);
    hiveConf = jobConf;
    isMapOperator = conf.isMapSide();

    reconstructQueryDef(hiveConf);

    if (isMapOperator) {
      PartitionedTableFunctionDef tDef = conf.getStartOfChain();
      outputObjInspector = tDef.getRawInputShape().getOI();
    } else {
      outputObjInspector = conf.getFuncDef().getOutputShape().getOI();
    }

    setupKeysWrapper(inputObjInspectors[0]);

    ptfInvocation = setupChain();
    ptfInvocation.initializeStreaming(jobConf, isMapOperator);
    firstMapRow = true;
    return result;
  }

  @Override
  protected void closeOp(boolean abort) throws HiveException {
    super.closeOp(abort);
    ptfInvocation.finishPartition();
    ptfInvocation.close();
  }

  @Override
  public void process(Object row, int tag) throws HiveException {
    if (!isMapOperator ) {
      /*
       * checkif current row belongs to the current accumulated Partition:
       * - If not:
       *  - process the current Partition
       *  - reset input Partition
       * - set currentKey to the newKey if it is null or has changed.
       */
      newKeys.getNewKey(row, inputObjInspectors[0]);
      boolean keysAreEqual = (currentKeys != null && newKeys != null)?
              newKeys.equals(currentKeys) : false;

      if (currentKeys != null && !keysAreEqual) {
        ptfInvocation.finishPartition();
      }

      if (currentKeys == null || !keysAreEqual) {
        ptfInvocation.startPartition();
        if (currentKeys == null) {
          currentKeys = newKeys.copyKey();
        } else {
          currentKeys.copyKey(newKeys);
        }
      }
    } else if ( firstMapRow ) {
      ptfInvocation.startPartition();
      firstMapRow = false;
    }

    ptfInvocation.processRow(row);
  }

  /**
   * Initialize the visitor to use the QueryDefDeserializer Use the order
   * defined in QueryDefWalker to visit the QueryDef
   *
   * @param hiveConf
   * @throws HiveException
   */
  protected void reconstructQueryDef(Configuration hiveConf) throws HiveException {

    PTFDeserializer dS =
        new PTFDeserializer(conf, (StructObjectInspector)inputObjInspectors[0], hiveConf);
    dS.initializePTFChain(conf.getFuncDef());
  }

  protected void setupKeysWrapper(ObjectInspector inputOI) throws HiveException {
    PartitionDef pDef = conf.getStartOfChain().getPartition();
    List exprs = pDef.getExpressions();
    int numExprs = exprs.size();
    ExprNodeEvaluator[] keyFields = new ExprNodeEvaluator[numExprs];
    ObjectInspector[] keyOIs = new ObjectInspector[numExprs];
    ObjectInspector[] currentKeyOIs = new ObjectInspector[numExprs];

    for(int i=0; i fnDefs = new Stack();
    PTFInputDef iDef = conf.getFuncDef();

    while (iDef instanceof PartitionedTableFunctionDef) {
      fnDefs.push((PartitionedTableFunctionDef) iDef);
      iDef = ((PartitionedTableFunctionDef) iDef).getInput();
    }

    PTFInvocation curr = null, first = null;

    while(!fnDefs.isEmpty()) {
      PartitionedTableFunctionDef currFn = fnDefs.pop();
      curr = new PTFInvocation(curr, currFn.getTFunction());
      if ( first == null ) {
        first = curr;
      }
    }
    return first;
  }

  public static void connectLeadLagFunctionsToPartition(PTFDesc ptfDesc,
      PTFPartitionIterator