All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.vector.expressions.StringGroupConcatColCol Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.vector.expressions;

import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.ColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;

/**
 * Vectorized instruction to concatenate two string columns and put
 * the output in a third column.
 */
public class StringGroupConcatColCol extends VectorExpression {
  private static final long serialVersionUID = 1L;
  private int colNum1;
  private int colNum2;
  private int outputColumn;

  public StringGroupConcatColCol(int colNum1, int colNum2, int outputColumn) {
    this();
    this.colNum1 = colNum1;
    this.colNum2 = colNum2;
    this.outputColumn = outputColumn;
  }

  public StringGroupConcatColCol() {
    super();
  }

  @Override
  public void evaluate(VectorizedRowBatch batch) {

    if (childExpressions != null) {
      super.evaluateChildren(batch);
    }

    BytesColumnVector inV1 = (BytesColumnVector) batch.cols[colNum1];
    BytesColumnVector inV2 = (BytesColumnVector) batch.cols[colNum2];
    BytesColumnVector outV = (BytesColumnVector) batch.cols[outputColumn];
    int[] sel = batch.selected;
    int n = batch.size;
    byte[][] vector1 = inV1.vector;
    byte[][] vector2 = inV2.vector;
    int[] len1 = inV1.length;
    int[] len2 = inV2.length;
    int[] start1 = inV1.start;
    int[] start2 = inV2.start;

    // return immediately if batch is empty
    if (n == 0) {
      return;
    }

    // prepare output buffer to accept results
    outV.initBuffer();

    /* Handle default case for isRepeating setting for output. This will be set to true
     * later in the special cases where that is necessary.
     */
    outV.isRepeating = false;

    if (inV1.noNulls && !inV2.noNulls) {

      // propagate nulls

      /* We'll assume that there *may* be nulls in the input if !noNulls is true
       * for an input vector. This is to be more forgiving of errors in loading
       * the vectors. A properly-written vectorized iterator will make sure that
       * isNull[0] is set if !noNulls and isRepeating are true for the vector.
       */
      outV.noNulls = false;
      if (inV2.isRepeating) {
        if (inV2.isNull[0]) {

          // Output will also be repeating and null
          outV.isNull[0] = true;
          outV.isRepeating = true;

          //return as no further processing is needed
          return;
        }
      } else {
        propagateNulls(batch.selectedInUse, n, sel, inV2, outV);
      }

      // perform data operation
      if (inV1.isRepeating && inV2.isRepeating) {

        /* All must be selected otherwise size would be zero.
         * Repeating property will not change.
         */
        if (!inV2.isNull[0]) {
          outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
        }
        outV.isRepeating = true;
      } else if (inV1.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV2.isNull[i]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV2.isNull[0]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        }
      } else if (inV2.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        }
      } else {
        if (batch.selectedInUse) {
          for(int j=0; j != n; j++) {
            int i = sel[j];
            if (!inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        }
      }
    } else if (!inV1.noNulls && inV2.noNulls) {

      // propagate nulls
      outV.noNulls = false;
      if (inV1.isRepeating) {

        //Output will also be repeating and null
        outV.isRepeating = true;
        outV.isNull[0] = true;

        //return as no further processing is needed
        return;
      } else {
        propagateNulls(batch.selectedInUse, n, sel, inV1, outV);
      }

      // perform data operation
      if (inV1.isRepeating && inV2.isRepeating) {
        //All must be selected otherwise size would be zero
        //Repeating property will not change.
        if (!inV1.isNull[0]) {
          outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
        }
        outV.isRepeating = true;
      } else if (inV1.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[0]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[0]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        }
      } else if (inV2.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        }
      } else {
        if (batch.selectedInUse) {
          for(int j=0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        }
      }
    } else if (!inV1.noNulls && !inV2.noNulls) {

      // propagate nulls
      outV.noNulls = false;
      if (inV1.isRepeating && inV2.isRepeating) {
        outV.isNull[0] = inV1.isNull[0] || inV2.isNull[0];

        //Output will also be repeating
        outV.isRepeating = true;

        // return if output is null because no additional work is needed
        if (outV.isNull[0]) {
          return;
        }
      } else if (inV1.isRepeating) {
        if (inV1.isNull[0]) {            // then all output will be null
          outV.isRepeating = true;
          outV.isNull[0] = true;
          return;
        } else {
          outV.isRepeating = false;
          propagateNulls(batch.selectedInUse, n, sel, inV2, outV);
        }
      } else if (inV2.isRepeating) {
        if (inV2.isNull[0]) {
          outV.isRepeating = true;
          outV.isNull[0] = true;
          return;
        } else {
          outV.isRepeating = false;
          propagateNulls(batch.selectedInUse, n, sel, inV1, outV);
        }
      } else {
        propagateNullsCombine(batch.selectedInUse, n, sel, inV1, inV2, outV);
      }

      // perform data operation
      if (inV1.isRepeating && inV2.isRepeating) {

        // All must be selected otherwise size would be zero. Repeating property will not change.
        if (!inV1.isNull[0] && !inV2.isNull[0]) {
          outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
        }
        outV.isRepeating = true;
      } else if (inV1.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[0] && !inV2.isNull[i]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[0] && !inV2.isNull[i]) {
              outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
            }
          }
        }
      } else if (inV2.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[i] && !inV2.isNull[0]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[i] && !inV2.isNull[0]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
            }
          }
        }
      } else {
        if (batch.selectedInUse) {
          for(int j=0; j != n; j++) {
            int i = sel[j];
            if (!inV1.isNull[i] && !inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        } else {
          for(int i = 0; i != n; i++) {
            if (!inV1.isNull[i] && !inV2.isNull[i]) {
              outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
            }
          }
        }
      }
    } else {      // there are no nulls in either input vector

      // propagate null information
      outV.noNulls = true;

      // perform data operation
      if (inV1.isRepeating && inV2.isRepeating) {

        // All must be selected otherwise size would be zero. Repeating property will not change.
        outV.setConcat(0, vector1[0], start1[0], len1[0], vector2[0], start2[0], len2[0]);
        outV.isRepeating = true;
      } else if (inV1.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
          }
        } else {
          for(int i = 0; i != n; i++) {
            outV.setConcat(i, vector1[0], start1[0], len1[0], vector2[i], start2[i], len2[i]);
          }
        }
      } else if (inV2.isRepeating) {
        if (batch.selectedInUse) {
          for(int j = 0; j != n; j++) {
            int i = sel[j];
            outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
          }
        } else {
          for(int i = 0; i != n; i++) {
            outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[0], start2[0], len2[0]);
          }
        }
      } else {
        if (batch.selectedInUse) {
          for(int j=0; j != n; j++) {
            int i = sel[j];
            outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
          }
        } else {
          for(int i = 0; i != n; i++) {
            outV.setConcat(i, vector1[i], start1[i], len1[i], vector2[i], start2[i], len2[i]);
          }
        }
      }
    }
  }

  /**
   * Propagate the logic OR of null vectors from two inputs to output.
   *
   * @param selectedInUse true/false flag to tell if sel[] is in use
   * @param n number of qualifying rows
   * @param sel selected value position array
   * @param inV1 input vector 1
   * @param inV2 input vector 2
   * @param outV output vector
   */
  private static void propagateNullsCombine(boolean selectedInUse, int n, int[] sel,
      ColumnVector inV1, ColumnVector inV2, BytesColumnVector outV) {
    if (selectedInUse) {
      for(int j = 0; j != n; j++) {
        int i = sel[j];
        outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i];
      }
    } else {
      for(int i = 0; i != n; i++) {
        outV.isNull[i] = inV1.isNull[i] || inV2.isNull[i];
      }
    }
  }

  /**
   * Propagate nulls from input vector inV to output vector outV.
   *
   * @param selectedInUse true/false flag to tell if sel[] is in use
   * @param sel selected value position array
   * @param n number of qualifying rows
   * @param inV input vector
   * @param outV ouput vector
   */
  private static void propagateNulls(boolean selectedInUse, int n, int[] sel, ColumnVector inV,
      ColumnVector outV) {
    if (selectedInUse) {
      for(int j = 0; j != n; j++) {
        int i = sel[j];
        outV.isNull[i] = inV.isNull[i];
      }
    } else {
      System.arraycopy(inV.isNull, 0, outV.isNull, 0, n);
    }
  }

  @Override
  public int getOutputColumn() {
    return outputColumn;
  }

  @Override
  public String getOutputType() {
    return "String_Family";
  }

  public int getColNum1() {
    return colNum1;
  }

  public void setColNum1(int colNum1) {
    this.colNum1 = colNum1;
  }

  public int getColNum2() {
    return colNum2;
  }

  public void setColNum2(int colNum2) {
    this.colNum2 = colNum2;
  }

  public void setOutputColumn(int outputColumn) {
    this.outputColumn = outputColumn;
  }

  @Override
  public VectorExpressionDescriptor.Descriptor getDescriptor() {
    return (new VectorExpressionDescriptor.Builder())
        .setMode(
            VectorExpressionDescriptor.Mode.PROJECTION)
        .setNumArguments(2)
        .setArgumentTypes(
            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY,
            VectorExpressionDescriptor.ArgumentType.STRING_FAMILY)
        .setInputExpressionTypes(
            VectorExpressionDescriptor.InputExpressionType.COLUMN,
            VectorExpressionDescriptor.InputExpressionType.COLUMN).build();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy