All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.generic;

import java.lang.reflect.Array;
import java.lang.reflect.Method;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.util.HashMap;

import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;

/**
 * Util functions for GenericUDF classes.
 */
public final class GenericUDFUtils {
  /**
   * Checks if b is the first byte of a UTF-8 character.
   *
   */
  public static boolean isUtfStartByte(byte b) {
    return (b & 0xC0) != 0x80;
  }

  /**
   * This class helps to find the return ObjectInspector for a GenericUDF.
   *
   * In many cases like CASE and IF, the GenericUDF is returning a value out of
   * several possibilities. However these possibilities may not always have the
   * same ObjectInspector.
   *
   * This class will help detect whether all possibilities have exactly the same
   * ObjectInspector. If not, then we need to convert the Objects to the same
   * ObjectInspector.
   *
   * A special case is when some values are constant NULL. In this case we can
   * use the same ObjectInspector.
   */
  public static class ReturnObjectInspectorResolver {

    boolean allowTypeConversion;
    ObjectInspector returnObjectInspector;

    // We create converters beforehand, so that the converters can reuse the
    // same object for returning conversion results.
    HashMap converters;

    public ReturnObjectInspectorResolver() {
      this(false);
    }

    public ReturnObjectInspectorResolver(boolean allowTypeConversion) {
      this.allowTypeConversion = allowTypeConversion;
    }

    /**
     * Update returnObjectInspector and valueInspectorsAreTheSame based on the
     * ObjectInspector seen.
     *
     * @return false if there is a type mismatch
     */
    public boolean update(ObjectInspector oi) throws UDFArgumentTypeException {
      return update(oi, false);
    }

    /**
     * Update returnObjectInspector and valueInspectorsAreTheSame based on the
     * ObjectInspector seen for UnionAll.
     *
     * @return false if there is a type mismatch
     */
    public boolean updateForUnionAll(ObjectInspector oi) throws UDFArgumentTypeException {
      return update(oi, true);
    }

    /**
     * Update returnObjectInspector and valueInspectorsAreTheSame based on the
     * ObjectInspector seen.
     *
     * @return false if there is a type mismatch
     */
    private boolean update(ObjectInspector oi, boolean isUnionAll) throws UDFArgumentTypeException {
      if (oi instanceof VoidObjectInspector) {
        return true;
      }

      if (returnObjectInspector == null) {
        // The first argument, just set the return to be the standard
        // writable version of this OI.
        returnObjectInspector = ObjectInspectorUtils
            .getStandardObjectInspector(oi,
            ObjectInspectorCopyOption.WRITABLE);
        return true;
      }

      if (returnObjectInspector == oi) {
        // The new ObjectInspector is the same as the old one, directly return
        // true
        return true;
      }

      TypeInfo oiTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
      TypeInfo rTypeInfo = TypeInfoUtils
          .getTypeInfoFromObjectInspector(returnObjectInspector);
      if (oiTypeInfo == rTypeInfo) {
        // Convert everything to writable, if types of arguments are the same,
        // but ObjectInspectors are different.
        returnObjectInspector = ObjectInspectorUtils
            .getStandardObjectInspector(returnObjectInspector,
            ObjectInspectorCopyOption.WRITABLE);
        return true;
      }

      if (!allowTypeConversion) {
        return false;
      }

      // Types are different, we need to check whether we can convert them to
      // a common base class or not.
      TypeInfo commonTypeInfo = null;
      if (isUnionAll) {
        commonTypeInfo = FunctionRegistry.getCommonClassForUnionAll(oiTypeInfo,
          rTypeInfo);
      } else {
        commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo,
          rTypeInfo);
      }
      if (commonTypeInfo == null) {
        return false;
      }

      /**
       * TODO: Hack fix until HIVE-5848 is addressed. non-exact type shouldn't be promoted
       * to exact type, as FunctionRegistry.getCommonClass() might do. This corrects
       * that.
       */
      if (commonTypeInfo instanceof DecimalTypeInfo) {
        if ((!FunctionRegistry.isExactNumericType((PrimitiveTypeInfo) oiTypeInfo)) ||
            (!FunctionRegistry.isExactNumericType((PrimitiveTypeInfo) rTypeInfo))) {
          commonTypeInfo = TypeInfoFactory.doubleTypeInfo;
        }
      }

      returnObjectInspector = TypeInfoUtils
          .getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo);

      return true;
    }

    /**
     * Returns the ObjectInspector of the return value.
     */
    public ObjectInspector get() {
      return get(PrimitiveObjectInspectorFactory.javaVoidObjectInspector);
    }

    public ObjectInspector get(ObjectInspector defaultOI) {
      return returnObjectInspector != null ? returnObjectInspector : defaultOI;
    }

    public Object convertIfNecessary(Object o, ObjectInspector oi) {
      return convertIfNecessary(o, oi, true);
    }

    /**
     * Convert the return Object if necessary (when the ObjectInspectors of
     * different possibilities are not all the same). If reuse is true,
     * the result Object will be the same object as the last invocation
     * (as long as the oi is the same)
     */
    public Object convertIfNecessary(Object o, ObjectInspector oi, boolean reuse) {
      Object converted = null;
      if (oi == returnObjectInspector) {
        converted = o;
      } else {

        if (o == null) {
          return null;
        }

        Converter converter = null;
        if (reuse) {
	  if (converters == null) {
	    converters = new HashMap();
	  }
	  converter = converters.get(oi);
        }

        if (converter == null) {
          converter = ObjectInspectorConverters.getConverter(oi,
              returnObjectInspector);
          if (reuse) {
            converters.put(oi, converter);
          }
        }
        converted = converter.convert(o);
      }
      return converted;
    }

  }

  /**
   * Convert parameters for the method if needed.
   */
  public static class ConversionHelper {

    private final ObjectInspector[] givenParameterOIs;
    Type[] methodParameterTypes;
    private final boolean isVariableLengthArgument;
    Type lastParaElementType;

    boolean conversionNeeded;
    Converter[] converters;
    Object[] convertedParameters;
    Object[] convertedParametersInArray;

    private static Class getClassFromType(Type t) {
      if (t instanceof Class) {
        return (Class) t;
      } else if (t instanceof ParameterizedType) {
        ParameterizedType pt = (ParameterizedType) t;
        return (Class) pt.getRawType();
      }
      return null;
    }

    /**
     * Create a PrimitiveConversionHelper for Method m. The ObjectInspector's
     * input parameters are specified in parameters.
     */
    public ConversionHelper(Method m, ObjectInspector[] parameterOIs)
        throws UDFArgumentException {
      givenParameterOIs = parameterOIs;

      methodParameterTypes = m.getGenericParameterTypes();

      // Whether the method takes an array like Object[],
      // or String[] etc in the last argument.
      lastParaElementType = TypeInfoUtils
          .getArrayElementType(methodParameterTypes.length == 0 ? null
          : methodParameterTypes[methodParameterTypes.length - 1]);
      isVariableLengthArgument = (lastParaElementType != null);

      // Create the output OI array
      ObjectInspector[] methodParameterOIs = new ObjectInspector[parameterOIs.length];

      if (isVariableLengthArgument) {

        // ConversionHelper can be called without method parameter length
        // checkings
        // for terminatePartial() and merge() calls.
        if (parameterOIs.length < methodParameterTypes.length - 1) {
          throw new UDFArgumentLengthException(m.toString()
              + " requires at least " + (methodParameterTypes.length - 1)
              + " arguments but only " + parameterOIs.length
              + " are passed in.");
        }
        // Copy the first methodParameterTypes.length - 1 entries
        for (int i = 0; i < methodParameterTypes.length - 1; i++) {
          // This method takes Object, so it accepts whatever types that are
          // passed in.
          if (methodParameterTypes[i] == Object.class) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          } else {
            methodParameterOIs[i] = ObjectInspectorFactory
                .getReflectionObjectInspector(methodParameterTypes[i],
                ObjectInspectorOptions.JAVA);
          }
        }

        // Deal with the last entry
        if (lastParaElementType == Object.class) {
          // This method takes Object[], so it accepts whatever types that are
          // passed in.
          for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          }
        } else {
          // This method takes something like String[], so it only accepts
          // something like String
          ObjectInspector oi = ObjectInspectorFactory
              .getReflectionObjectInspector(lastParaElementType,
              ObjectInspectorOptions.JAVA);
          for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) {
            methodParameterOIs[i] = oi;
          }
        }

      } else {

        // Normal case, the last parameter is a normal parameter.
        // ConversionHelper can be called without method parameter length
        // checkings
        // for terminatePartial() and merge() calls.
        if (methodParameterTypes.length != parameterOIs.length) {
          throw new UDFArgumentLengthException(m.toString() + " requires "
              + methodParameterTypes.length + " arguments but "
              + parameterOIs.length + " are passed in.");
        }
        for (int i = 0; i < methodParameterTypes.length; i++) {
          // This method takes Object, so it accepts whatever types that are
          // passed in.
          if (methodParameterTypes[i] == Object.class) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          } else {
            methodParameterOIs[i] = ObjectInspectorFactory
                .getReflectionObjectInspector(methodParameterTypes[i],
                ObjectInspectorOptions.JAVA);
          }
        }
      }

      // Create the converters
      conversionNeeded = false;
      converters = new Converter[parameterOIs.length];
      for (int i = 0; i < parameterOIs.length; i++) {
        Converter pc = ObjectInspectorConverters.getConverter(parameterOIs[i],
            methodParameterOIs[i]);
        converters[i] = pc;
        // Conversion is needed?
        conversionNeeded = conversionNeeded
            || (!(pc instanceof IdentityConverter));
      }

      if (isVariableLengthArgument) {
        convertedParameters = new Object[methodParameterTypes.length];
        convertedParametersInArray = (Object[]) Array.newInstance(
            getClassFromType(lastParaElementType), parameterOIs.length
            - methodParameterTypes.length + 1);
        convertedParameters[convertedParameters.length - 1] = convertedParametersInArray;
      } else {
        convertedParameters = new Object[parameterOIs.length];
      }
    }

    public Object[] convertIfNecessary(Object... parameters) {

      assert (parameters.length == givenParameterOIs.length);

      if (!conversionNeeded && !isVariableLengthArgument) {
        // no conversion needed, and not variable-length argument:
        // just return what is passed in.
        return parameters;
      }

      if (isVariableLengthArgument) {
        // convert the first methodParameterTypes.length - 1 entries
        for (int i = 0; i < methodParameterTypes.length - 1; i++) {
          convertedParameters[i] = converters[i].convert(parameters[i]);
        }
        // convert the rest and put into the last entry
        for (int i = methodParameterTypes.length - 1; i < parameters.length; i++) {
          convertedParametersInArray[i + 1 - methodParameterTypes.length] = converters[i]
              .convert(parameters[i]);
        }
      } else {
        // normal case, convert all parameters
        for (int i = 0; i < methodParameterTypes.length; i++) {
          convertedParameters[i] = converters[i].convert(parameters[i]);
        }
      }
      return convertedParameters;
    }
  };

  /**
   * Helper class for UDFs returning string/varchar/char
   */
  public static class StringHelper {

    protected Object returnValue;
    protected PrimitiveCategory type;

    public StringHelper(PrimitiveCategory type) throws UDFArgumentException {
      this.type = type;
      switch (type) {
        case STRING:
          returnValue = new Text();
          break;
        case CHAR:
          returnValue = new HiveCharWritable();
          break;
        case VARCHAR:
          returnValue = new HiveVarcharWritable();
          break;
        default:
          throw new UDFArgumentException("Unexpected non-string type " + type);
      }
    }

    public Object setReturnValue(String val) throws UDFArgumentException {
      if (val == null) {
        return null;
      }
      switch (type) {
        case STRING:
          ((Text)returnValue).set(val);
          return returnValue;
        case CHAR:
          ((HiveCharWritable) returnValue).set(val);
          return returnValue;
        case VARCHAR:
          ((HiveVarcharWritable)returnValue).set(val);
          return returnValue;
        default:
          throw new UDFArgumentException("Bad return type " + type);
      }
    }

    /**
     * Helper function to help GenericUDFs determine the return type
     * character length for char/varchar.
     * @param poi PrimitiveObjectInspector representing the type
     * @return character length of the type
     * @throws UDFArgumentException
     */
    public static int getFixedStringSizeForType(PrimitiveObjectInspector poi)
        throws UDFArgumentException {
      // TODO: we can support date, int, .. any types which would have a fixed length value
      switch (poi.getPrimitiveCategory()) {
        case CHAR:
        case VARCHAR:
          BaseCharTypeInfo typeInfo = (BaseCharTypeInfo) poi.getTypeInfo();
          return typeInfo.getLength();
        default:
          throw new UDFArgumentException("No fixed size for type " + poi.getTypeName());
      }
    }

  }

  /**
   * Return an ordinal from an integer.
   */
  public static String getOrdinal(int i) {
    int unit = i % 10;
    return (i <= 0) ? "" : (i != 11 && unit == 1) ? i + "st"
        : (i != 12 && unit == 2) ? i + "nd" : (i != 13 && unit == 3) ? i + "rd"
        : i + "th";
  }

  /**
   * Finds any occurence of subtext from text in the
   * backing buffer.
   */
  public static int findText(Text text, Text subtext, int start) {
    // src.position(start) can't accept negative numbers.
    int length = text.getLength() - start;
    if (start < 0 || length < 0 || length < subtext.getLength()) {
      return -1;
    }
    if (subtext.getLength() == 0) {
      return 0;
    }
    if (length == 0) {
      return -1;
    }

    String textString = text.toString();
    String subtextString = subtext.toString();
    int index = textString.indexOf(subtextString, start);
    if (index == -1) {
      return index;
    } else {
      return textString.codePointCount(0, index);
    }
  }

  private GenericUDFUtils() {
    // prevent instantiation
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy