All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils Maven / Gradle / Ivy

Go to download

Hive is a data warehouse infrastructure built on top of Hadoop see http://wiki.apache.org/hadoop/Hive

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.udf.generic;

import java.lang.reflect.Array;
import java.lang.reflect.Method;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.nio.ByteBuffer;
import java.util.HashMap;

import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.IdentityConverter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;

/**
 * Util functions for GenericUDF classes.
 */
public final class GenericUDFUtils {
  /**
   * Checks if b is the first byte of a UTF-8 character.
   * 
   */
  public static boolean isUtfStartByte(byte b) {
    return (b & 0xC0) != 0x80;
  }

  /**
   * This class helps to find the return ObjectInspector for a GenericUDF.
   * 
   * In many cases like CASE and IF, the GenericUDF is returning a value out of
   * several possibilities. However these possibilities may not always have the
   * same ObjectInspector.
   * 
   * This class will help detect whether all possibilities have exactly the same
   * ObjectInspector. If not, then we need to convert the Objects to the same
   * ObjectInspector.
   * 
   * A special case is when some values are constant NULL. In this case we can
   * use the same ObjectInspector.
   */
  public static class ReturnObjectInspectorResolver {

    boolean allowTypeConversion;
    ObjectInspector returnObjectInspector;

    // We create converters beforehand, so that the converters can reuse the
    // same object for returning conversion results.
    HashMap converters;

    public ReturnObjectInspectorResolver() {
      this(false);
    }

    public ReturnObjectInspectorResolver(boolean allowTypeConversion) {
      this.allowTypeConversion = allowTypeConversion;
    }

    /**
     * Update returnObjectInspector and valueInspectorsAreTheSame based on the
     * ObjectInspector seen.
     * 
     * @return false if there is a type mismatch
     */
    public boolean update(ObjectInspector oi) throws UDFArgumentTypeException {
      if (oi instanceof VoidObjectInspector) {
        return true;
      }

      if (returnObjectInspector == null) {
        // The first argument, just set the return to be the standard
        // writable version of this OI.
        returnObjectInspector = ObjectInspectorUtils
            .getStandardObjectInspector(oi,
            ObjectInspectorCopyOption.WRITABLE);
        return true;
      }

      if (returnObjectInspector == oi) {
        // The new ObjectInspector is the same as the old one, directly return
        // true
        return true;
      }

      TypeInfo oiTypeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(oi);
      TypeInfo rTypeInfo = TypeInfoUtils
          .getTypeInfoFromObjectInspector(returnObjectInspector);
      if (oiTypeInfo == rTypeInfo) {
        // Convert everything to writable, if types of arguments are the same,
        // but ObjectInspectors are different.
        returnObjectInspector = ObjectInspectorUtils
            .getStandardObjectInspector(returnObjectInspector,
            ObjectInspectorCopyOption.WRITABLE);
        return true;
      }

      if (!allowTypeConversion) {
        return false;
      }

      // Types are different, we need to check whether we can convert them to
      // a common base class or not.
      TypeInfo commonTypeInfo = FunctionRegistry.getCommonClass(oiTypeInfo,
          rTypeInfo);
      if (commonTypeInfo == null) {
        return false;
      }

      returnObjectInspector = TypeInfoUtils
          .getStandardWritableObjectInspectorFromTypeInfo(commonTypeInfo);

      return true;
    }

    /**
     * Returns the ObjectInspector of the return value.
     */
    public ObjectInspector get() {
      return returnObjectInspector;
    }

    /**
     * Convert the return Object if necessary (when the ObjectInspectors of
     * different possibilities are not all the same).
     */
    public Object convertIfNecessary(Object o, ObjectInspector oi) {
      Object converted = null;
      if (oi == returnObjectInspector) {
        converted = o;
      } else {

        if (o == null) {
          return null;
        }

        if (converters == null) {
          converters = new HashMap();
        }

        Converter converter = converters.get(oi);
        if (converter == null) {
          converter = ObjectInspectorConverters.getConverter(oi,
              returnObjectInspector);
          converters.put(oi, converter);
        }
        converted = converter.convert(o);
      }
      return converted;
    }

  }

  /**
   * Convert parameters for the method if needed.
   */
  public static class ConversionHelper {

    private final ObjectInspector[] givenParameterOIs;
    Type[] methodParameterTypes;
    private final boolean isVariableLengthArgument;
    Type lastParaElementType;

    boolean conversionNeeded;
    Converter[] converters;
    Object[] convertedParameters;
    Object[] convertedParametersInArray;

    private static Class getClassFromType(Type t) {
      if (t instanceof Class) {
        return (Class) t;
      } else if (t instanceof ParameterizedType) {
        ParameterizedType pt = (ParameterizedType) t;
        return (Class) pt.getRawType();
      }
      return null;
    }

    /**
     * Create a PrimitiveConversionHelper for Method m. The ObjectInspector's
     * input parameters are specified in parameters.
     */
    public ConversionHelper(Method m, ObjectInspector[] parameterOIs)
        throws UDFArgumentException {
      givenParameterOIs = parameterOIs;

      methodParameterTypes = m.getGenericParameterTypes();

      // Whether the method takes an array like Object[],
      // or String[] etc in the last argument.
      lastParaElementType = TypeInfoUtils
          .getArrayElementType(methodParameterTypes.length == 0 ? null
          : methodParameterTypes[methodParameterTypes.length - 1]);
      isVariableLengthArgument = (lastParaElementType != null);

      // Create the output OI array
      ObjectInspector[] methodParameterOIs = new ObjectInspector[parameterOIs.length];

      if (isVariableLengthArgument) {

        // ConversionHelper can be called without method parameter length
        // checkings
        // for terminatePartial() and merge() calls.
        if (parameterOIs.length < methodParameterTypes.length - 1) {
          throw new UDFArgumentLengthException(m.toString()
              + " requires at least " + (methodParameterTypes.length - 1)
              + " arguments but only " + parameterOIs.length
              + " are passed in.");
        }
        // Copy the first methodParameterTypes.length - 1 entries
        for (int i = 0; i < methodParameterTypes.length - 1; i++) {
          // This method takes Object, so it accepts whatever types that are
          // passed in.
          if (methodParameterTypes[i] == Object.class) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          } else {
            methodParameterOIs[i] = ObjectInspectorFactory
                .getReflectionObjectInspector(methodParameterTypes[i],
                ObjectInspectorOptions.JAVA);
          }
        }

        // Deal with the last entry
        if (lastParaElementType == Object.class) {
          // This method takes Object[], so it accepts whatever types that are
          // passed in.
          for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          }
        } else {
          // This method takes something like String[], so it only accepts
          // something like String
          ObjectInspector oi = ObjectInspectorFactory
              .getReflectionObjectInspector(lastParaElementType,
              ObjectInspectorOptions.JAVA);
          for (int i = methodParameterTypes.length - 1; i < parameterOIs.length; i++) {
            methodParameterOIs[i] = oi;
          }
        }

      } else {

        // Normal case, the last parameter is a normal parameter.
        // ConversionHelper can be called without method parameter length
        // checkings
        // for terminatePartial() and merge() calls.
        if (methodParameterTypes.length != parameterOIs.length) {
          throw new UDFArgumentLengthException(m.toString() + " requires "
              + methodParameterTypes.length + " arguments but "
              + parameterOIs.length + " are passed in.");
        }
        for (int i = 0; i < methodParameterTypes.length; i++) {
          // This method takes Object, so it accepts whatever types that are
          // passed in.
          if (methodParameterTypes[i] == Object.class) {
            methodParameterOIs[i] = ObjectInspectorUtils
                .getStandardObjectInspector(parameterOIs[i],
                ObjectInspectorCopyOption.JAVA);
          } else {
            methodParameterOIs[i] = ObjectInspectorFactory
                .getReflectionObjectInspector(methodParameterTypes[i],
                ObjectInspectorOptions.JAVA);
          }
        }
      }

      // Create the converters
      conversionNeeded = false;
      converters = new Converter[parameterOIs.length];
      for (int i = 0; i < parameterOIs.length; i++) {
        Converter pc = ObjectInspectorConverters.getConverter(parameterOIs[i],
            methodParameterOIs[i]);
        converters[i] = pc;
        // Conversion is needed?
        conversionNeeded = conversionNeeded
            || (!(pc instanceof IdentityConverter));
      }

      if (isVariableLengthArgument) {
        convertedParameters = new Object[methodParameterTypes.length];
        convertedParametersInArray = (Object[]) Array.newInstance(
            getClassFromType(lastParaElementType), parameterOIs.length
            - methodParameterTypes.length + 1);
        convertedParameters[convertedParameters.length - 1] = convertedParametersInArray;
      } else {
        convertedParameters = new Object[parameterOIs.length];
      }
    }

    public Object[] convertIfNecessary(Object... parameters) {

      assert (parameters.length == givenParameterOIs.length);

      if (!conversionNeeded && !isVariableLengthArgument) {
        // no conversion needed, and not variable-length argument:
        // just return what is passed in.
        return parameters;
      }

      if (isVariableLengthArgument) {
        // convert the first methodParameterTypes.length - 1 entries
        for (int i = 0; i < methodParameterTypes.length - 1; i++) {
          convertedParameters[i] = converters[i].convert(parameters[i]);
        }
        // convert the rest and put into the last entry
        for (int i = methodParameterTypes.length - 1; i < parameters.length; i++) {
          convertedParametersInArray[i + 1 - methodParameterTypes.length] = converters[i]
              .convert(parameters[i]);
        }
      } else {
        // normal case, convert all parameters
        for (int i = 0; i < methodParameterTypes.length; i++) {
          convertedParameters[i] = converters[i].convert(parameters[i]);
        }
      }
      return convertedParameters;
    }
  };

  /**
   * Return an ordinal from an integer.
   */
  public static String getOrdinal(int i) {
    int unit = i % 10;
    return (i <= 0) ? "" : (i != 11 && unit == 1) ? i + "st"
        : (i != 12 && unit == 2) ? i + "nd" : (i != 13 && unit == 3) ? i + "rd"
        : i + "th";
  }

  /**
   * Finds any occurence of subtext from text in the
   * backing buffer, for avoiding string encoding and decoding. Shamelessly copy
   * from {@link org.apache.hadoop.io.Text#find(String, int)}.
   */
  public static int findText(Text text, Text subtext, int start) {
    // src.position(start) can't accept negative numbers.
    if (start < 0) {
      return -1;
    }

    ByteBuffer src = ByteBuffer.wrap(text.getBytes(), 0, text.getLength());
    ByteBuffer tgt = ByteBuffer
        .wrap(subtext.getBytes(), 0, subtext.getLength());
    byte b = tgt.get();
    src.position(start);

    while (src.hasRemaining()) {
      if (b == src.get()) { // matching first byte
        src.mark(); // save position in loop
        tgt.mark(); // save position in target
        boolean found = true;
        int pos = src.position() - 1;
        while (tgt.hasRemaining()) {
          if (!src.hasRemaining()) { // src expired first
            tgt.reset();
            src.reset();
            found = false;
            break;
          }
          if (!(tgt.get() == src.get())) {
            tgt.reset();
            src.reset();
            found = false;
            break; // no match
          }
        }
        if (found) {
          return pos;
        }
      }
    }
    return -1; // not found
  }

  private GenericUDFUtils() {
    // prevent instantiation
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy