org.apache.hadoop.hive.ql.exec.vector.VectorGroupKeyHelper Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.io.DataOutputBuffer;

/**
 * Class for copying the group key from an input batch to an output batch.
 */
public class VectorGroupKeyHelper extends VectorColumnSetInfo {

  public VectorGroupKeyHelper(int keyCount) {
    super(keyCount);
   }

  void init(VectorExpression[] keyExpressions) throws HiveException {
    // Inspect the output type of each key expression.
    for(int i=0; i < keyExpressions.length; ++i) {
      addKey(keyExpressions[i].getOutputType());
    }
    finishAdding();
  }

  /*
   * This helper method copies the group keys from one vectorized row batch to another,
   * but does not increment the outputBatch.size (i.e. the next output position).
   * 
   * It was designed for VectorGroupByOperator's sorted reduce group batch processing mode
   * to copy the group keys at startGroup.
   */
  public void copyGroupKey(VectorizedRowBatch inputBatch, VectorizedRowBatch outputBatch,
          DataOutputBuffer buffer) throws HiveException {
    for(int i = 0; i< longIndices.length; ++i) {
      int keyIndex = longIndices[i];
      LongColumnVector inputColumnVector = (LongColumnVector) inputBatch.cols[keyIndex];
      LongColumnVector outputColumnVector = (LongColumnVector) outputBatch.cols[keyIndex];

      // This vectorized code pattern says: 
      //    If the input batch has no nulls at all (noNulls is true) OR
      //    the input row is NOT NULL, copy the value.
      //
      //    Otherwise, we have a NULL input value.  The standard way to mark a NULL in the
      //    output batch is: turn off noNulls indicating there is at least one NULL in the batch
      //    and mark that row as NULL.
      //
      //    When a vectorized row batch is reset, noNulls is set to true and the isNull array
      //    is zeroed.
      //
      // We grab the key at index 0.  We don't care about selected or repeating since all keys
      // in the input batch are suppose to be the same.
      //
      if (inputColumnVector.noNulls || !inputColumnVector.isNull[0]) {
        outputColumnVector.vector[outputBatch.size] = inputColumnVector.vector[0];
      } else {
        outputColumnVector.noNulls = false;
        outputColumnVector.isNull[outputBatch.size] = true;
      }
    }
    for(int i=0;i