All Downloads are FREE. Search and download functionalities are using the official Maven repository.

co.cask.hydrator.plugin.batch.commons.HiveSchemaConverter Maven / Gradle / Ivy

There is a newer version: 2.1.2
Show newest version
/*
 * Copyright © 2015-2016 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package co.cask.hydrator.plugin.batch.commons;

import co.cask.cdap.api.data.schema.Schema;
import com.google.common.base.Preconditions;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;

/**
 * 

>Hive Schema Converter class to convert {@link Schema} to {@link HCatSchema} and vice versa.

* Note: If the {@link HCatSchema} contains non-primitive type then this conversion to {@link Schema} will fail. */ public class HiveSchemaConverter { private static final Logger LOG = LoggerFactory.getLogger(HiveSchemaConverter.class); /** * Converts a CDAP's {@link Schema} to Hive's {@link HCatSchema} while verifying the fields in the given * {@link Schema} to exists in the table. The valid types for {@link Schema} which can be converted into * {@link HCatSchema} are boolean, int, long, float, double, string and bytes. * * @param schema the {@link Schema} * @return {@link HCatSchema} for the given {@link Schema} * @throws NullPointerException if a field in the given {@link Schema} is not found in table's {@link HCatSchema} */ public static HCatSchema toHiveSchema(Schema schema, HCatSchema tableSchema) { List fields = Lists.newArrayList(); for (Schema.Field field : schema.getFields()) { String name = field.getName(); try { // this field of the schema must exist in the table and should be of the same type HCatFieldSchema hCatFieldSchema = tableSchema.get(name); Preconditions.checkNotNull(hCatFieldSchema, "Missing field %s in table schema", name); PrimitiveTypeInfo hiveType = hCatFieldSchema.getTypeInfo(); PrimitiveTypeInfo type = getType(name, field.getSchema()); if (!hiveType.equals(type)) { LOG.warn("The given schema {} for the field {} does not match the schema {} from the table. " + "The schema {} for field {} will be used.", type, name, hiveType, hiveType, name); } fields.add(hCatFieldSchema); } catch (HCatException e) { throw new IllegalArgumentException( String.format("Failed to create HCatFieldSchema field %s of type %s from schema", name, field.getSchema().getType()), e); } } return new HCatSchema(fields); } /** * Returns the {@link PrimitiveTypeInfo} for the {@link Schema.Type} * * @param name name of the field * @param schema {@link Schema} of the field * @return {@link PrimitiveTypeInfo} for the given {@link Schema.Type} which is compatible with Hive. */ private static PrimitiveTypeInfo getType(String name, Schema schema) { Schema.Type type = schema.isNullable() ? schema.getNonNullable().getType() : schema.getType(); switch (type) { case BOOLEAN: return TypeInfoFactory.booleanTypeInfo; case INT: return TypeInfoFactory.intTypeInfo; case LONG: return TypeInfoFactory.longTypeInfo; case FLOAT: return TypeInfoFactory.floatTypeInfo; case DOUBLE: return TypeInfoFactory.doubleTypeInfo; case STRING: return TypeInfoFactory.stringTypeInfo; case BYTES: return TypeInfoFactory.binaryTypeInfo; default: throw new IllegalArgumentException(String.format( "Schema contains field '%s' with unsupported type %s. " + "You should provide an schema with this field dropped to work with this table.", name, type)); } } /** *

Converts a {@link HCatSchema} from hive to {@link Schema} for CDAP.

*

Note: This conversion does not support non-primitive types and the conversion will fail. * The conversion might also change the primitive type. * See {@link #getType(String, PrimitiveObjectInspector.PrimitiveCategory)} for details.

* The valid types of {@link HCatFieldSchema} which can be converted into {@link Schema} are boolean, byte, char, * short, int, long, float, double, string, varchar, binary * * @param hiveSchema the {@link HCatSchema} of the hive table * @return {@link Schema} for the given {@link HCatSchema} */ public static Schema toSchema(HCatSchema hiveSchema) { List fields = Lists.newArrayList(); for (HCatFieldSchema field : hiveSchema.getFields()) { String name = field.getName(); if (field.isComplex()) { throw new IllegalArgumentException(String.format( "Table schema contains field '%s' with complex type %s. Only primitive types are supported.", name, field.getTypeString())); } fields.add(Schema.Field.of(name, getType(name, field.getTypeInfo().getPrimitiveCategory()))); } return Schema.recordOf("record", fields); } /** * Returns the {@link Schema.Type} compatible for this field from hive. * * @param name name of the field * @param category the field's {@link PrimitiveObjectInspector.PrimitiveCategory} * @return the {@link Schema.Type} for this field */ private static Schema getType(String name, PrimitiveObjectInspector.PrimitiveCategory category) { System.out.println("### primitive cat " + category); switch (category) { case BOOLEAN: return Schema.nullableOf(Schema.of(Schema.Type.BOOLEAN)); case BYTE: case SHORT: case INT: return Schema.nullableOf(Schema.of(Schema.Type.INT)); case LONG: return Schema.nullableOf(Schema.of(Schema.Type.LONG)); case FLOAT: return Schema.nullableOf(Schema.of(Schema.Type.FLOAT)); case DOUBLE: return Schema.nullableOf(Schema.of(Schema.Type.DOUBLE)); case CHAR: case STRING: case VARCHAR: return Schema.nullableOf(Schema.of(Schema.Type.STRING)); case BINARY: return Schema.nullableOf(Schema.of(Schema.Type.BYTES)); // We can support VOID by having Schema type as null but HCatRecord does not support VOID and since we read // write through HCatSchema and HCatRecord we are not supporting VOID too for consistent behavior. case VOID: case DATE: case TIMESTAMP: case DECIMAL: case UNKNOWN: default: throw new IllegalArgumentException(String.format("Table schema contains field '%s' with unsupported type %s", name, category.name())); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy