All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.serde2.avro.SchemaToTypeInfo Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.serde2.avro;

import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;

import static org.apache.avro.Schema.Type.BOOLEAN;
import static org.apache.avro.Schema.Type.DOUBLE;
import static org.apache.avro.Schema.Type.FLOAT;
import static org.apache.avro.Schema.Type.INT;
import static org.apache.avro.Schema.Type.LONG;
import static org.apache.avro.Schema.Type.NULL;
import static org.apache.avro.Schema.Type.STRING;

/**
 * Convert an Avro Schema to a Hive TypeInfo
 */
class SchemaToTypeInfo {
  // Conversion of Avro primitive types to Hive primitive types
  // Avro             Hive
  // Null
  // boolean          boolean    check
  // int              int        check
  // long             bigint     check
  // float            double     check
  // double           double     check
  // bytes
  // string           string     check
  //                  tinyint
  //                  smallint

  // Map of Avro's primitive types to Hives (for those that are supported by both)
  private static final Map primitiveTypeToTypeInfo = initTypeMap();
  private static Map initTypeMap() {
    Map theMap = new Hashtable();
    theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
    theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
    theMap.put(BOOLEAN, TypeInfoFactory.getPrimitiveTypeInfo("boolean"));
    theMap.put(LONG, TypeInfoFactory.getPrimitiveTypeInfo("bigint"));
    theMap.put(FLOAT, TypeInfoFactory.getPrimitiveTypeInfo("float"));
    theMap.put(DOUBLE, TypeInfoFactory.getPrimitiveTypeInfo("double"));
    theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
    return Collections.unmodifiableMap(theMap);
  }

  /**
   * Generate a list of of TypeInfos from an Avro schema.  This method is
   * currently public due to some weirdness in deserializing unions, but
   * will be made private once that is resolved.
   * @param schema Schema to generate field types for
   * @return List of TypeInfos, each element of which is a TypeInfo derived
   *         from the schema.
   * @throws AvroSerdeException for problems during conversion.
   */
  public static List generateColumnTypes(Schema schema) throws AvroSerdeException {
    List fields = schema.getFields();

    List types = new ArrayList(fields.size());

    for (Schema.Field field : fields) {
      types.add(generateTypeInfo(field.schema()));
    }

    return types;
  }

  static InstanceCache typeInfoCache = new InstanceCache() {
                                  @Override
                                  protected TypeInfo makeInstance(Schema s) throws AvroSerdeException {
                                    return generateTypeInfoWorker(s);
                                  }
                                };
  /**
   * Convert an Avro Schema into an equivalent Hive TypeInfo.
   * @param schema to record. Must be of record type.
   * @return TypeInfo matching the Avro schema
   * @throws AvroSerdeException for any problems during conversion.
   */
  public static TypeInfo generateTypeInfo(Schema schema) throws AvroSerdeException {
    return typeInfoCache.retrieve(schema);
  }

  private static TypeInfo generateTypeInfoWorker(Schema schema) throws AvroSerdeException {
    // Avro requires NULLable types to be defined as unions of some type T
    // and NULL.  This is annoying and we're going to hide it from the user.
    if(AvroSerdeUtils.isNullableType(schema))
      return generateTypeInfo(AvroSerdeUtils.getOtherTypeFromNullableType(schema));

    Schema.Type type = schema.getType();

    if(primitiveTypeToTypeInfo.containsKey(type))
      return primitiveTypeToTypeInfo.get(type);

    switch(type) {
      case BYTES:  return generateBytesTypeInfo(schema);
      case RECORD: return generateRecordTypeInfo(schema);
      case MAP:    return generateMapTypeInfo(schema);
      case ARRAY:  return generateArrayTypeInfo(schema);
      case UNION:  return generateUnionTypeInfo(schema);
      case ENUM:   return generateEnumTypeInfo(schema);
      case FIXED:  return generateFixedTypeInfo(schema);
      default:     throw new AvroSerdeException("Do not yet support: " + schema);
    }
  }

  private static TypeInfo generateRecordTypeInfo(Schema schema) throws AvroSerdeException {
    assert schema.getType().equals(Schema.Type.RECORD);

    List fields = schema.getFields();
    List fieldNames = new ArrayList(fields.size());
    List typeInfos = new ArrayList(fields.size());

    for(int i = 0; i < fields.size(); i++) {
      fieldNames.add(i, fields.get(i).name());
      typeInfos.add(i, generateTypeInfo(fields.get(i).schema()));
    }

    return TypeInfoFactory.getStructTypeInfo(fieldNames, typeInfos);
  }

  /**
   * Generate a TypeInfo for an Avro Map.  This is made slightly simpler in that
   * Avro only allows maps with strings for keys.
   */
  private static TypeInfo generateMapTypeInfo(Schema schema) throws AvroSerdeException {
    assert schema.getType().equals(Schema.Type.MAP);
    Schema valueType = schema.getValueType();
    TypeInfo ti = generateTypeInfo(valueType);

    return TypeInfoFactory.getMapTypeInfo(TypeInfoFactory.getPrimitiveTypeInfo("string"), ti);
  }

  private static TypeInfo generateArrayTypeInfo(Schema schema) throws AvroSerdeException {
    assert schema.getType().equals(Schema.Type.ARRAY);
    Schema itemsType = schema.getElementType();
    TypeInfo itemsTypeInfo = generateTypeInfo(itemsType);

    return TypeInfoFactory.getListTypeInfo(itemsTypeInfo);
  }

  private static TypeInfo generateUnionTypeInfo(Schema schema) throws AvroSerdeException {
    assert schema.getType().equals(Schema.Type.UNION);
    List types = schema.getTypes();


    List typeInfos = new ArrayList(types.size());

    for(Schema type : types) {
      typeInfos.add(generateTypeInfo(type));
    }

    return TypeInfoFactory.getUnionTypeInfo(typeInfos);
  }

  // Hive doesn't have an Enum type, so we're going to treat them as Strings.
  // During the deserialize/serialize stage we'll check for enumness and
  // convert as such.
  private static TypeInfo generateEnumTypeInfo(Schema schema) {
    assert schema.getType().equals(Schema.Type.ENUM);

    return TypeInfoFactory.getPrimitiveTypeInfo("string");
  }

  // Hive doesn't have a Fixed type, so we're going to treat them as arrays of
  // bytes
  // TODO: Make note in documentation that Hive sends these out as signed bytes.
  private static final TypeInfo FIXED_AND_BYTES_EQUIV =
          TypeInfoFactory.getListTypeInfo(TypeInfoFactory.byteTypeInfo);
  private static TypeInfo generateFixedTypeInfo(Schema schema) {
    assert schema.getType().equals(Schema.Type.FIXED);

    return FIXED_AND_BYTES_EQUIV;
  }

  // Avro considers bytes to be a primitive type, but Hive doesn't.  We'll
  // convert them to a list of bytes, just like Fixed.  Sigh.
  private static TypeInfo generateBytesTypeInfo(Schema schema) {
    assert schema.getType().equals(Schema.Type.BYTES);
    return FIXED_AND_BYTES_EQUIV;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy