All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.udf.generic.GenericUDAFVarianceSample Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.udf.generic;

import org.apache.hadoop.hive.ql.exec.Description;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedUDAFs;
import org.apache.hadoop.hive.ql.exec.vector.expressions.aggregates.gen.*;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;

/**
 * Compute the sample variance by extending GenericUDAFVariance and overriding
 * the terminate() method of the evaluator.
 *
 */
@Description(name = "var_samp",
    value = "_FUNC_(x) - Returns the sample variance of a set of numbers.\n"
          + "If applied to an empty set: NULL is returned.\n"
          + "If applied to a set with a single element: NULL is returned.\n"
          + "Otherwise it computes: (S2-S1*S1/N)/(N-1)")
public class GenericUDAFVarianceSample extends GenericUDAFVariance {

  @Override
  public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters)
      throws SemanticException {
    if (parameters.length != 1) {
      throw new UDFArgumentTypeException(parameters.length - 1,
          "Exactly one argument is expected.");
    }

    if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE) {
      throw new UDFArgumentTypeException(0,
          "Only primitive type arguments are accepted but "
          + parameters[0].getTypeName() + " is passed.");
    }
    switch (((PrimitiveTypeInfo) parameters[0]).getPrimitiveCategory()) {
    case BYTE:
    case SHORT:
    case INT:
    case LONG:
    case FLOAT:
    case DOUBLE:
    case STRING:
    case TIMESTAMP:
    case DECIMAL:
      return new GenericUDAFVarianceSampleEvaluator();
    case BOOLEAN:
    case DATE:
    default:
      throw new UDFArgumentTypeException(0,
          "Only numeric or string type arguments are accepted but "
          + parameters[0].getTypeName() + " is passed.");
    }
  }

  /**
   * Compute the sample variance by extending GenericUDAFVarianceEvaluator and
   * overriding the terminate() method of the evaluator.
   */
  @VectorizedUDAFs({
    VectorUDAFVarLong.class, VectorUDAFVarLongComplete.class,
    VectorUDAFVarDouble.class, VectorUDAFVarDoubleComplete.class,
    VectorUDAFVarDecimal.class, VectorUDAFVarDecimalComplete.class,
    VectorUDAFVarTimestamp.class, VectorUDAFVarTimestampComplete.class,
    VectorUDAFVarPartial2.class, VectorUDAFVarFinal.class})
  public static class GenericUDAFVarianceSampleEvaluator extends
      GenericUDAFVarianceEvaluator {

    /*
     * Calculate the variance sample result when count > 1.  Public so vectorization code can
     * use it, etc.
     */
    public static double calculateVarianceSampleResult(double variance, long count) {
      return variance / (count - 1);
    }

    @Override
    public Object terminate(AggregationBuffer agg) throws HiveException {
      StdAgg myagg = (StdAgg) agg;

      if (myagg.count <= 1) {
        return null;
      } else {
        getResult().set(
            calculateVarianceSampleResult(myagg.variance, myagg.count));
        return getResult();
      }
    }
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy