All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.gobblin.hive.HiveSerDeManager Maven / Gradle / Ivy

There is a newer version: 0.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.gobblin.hive;

import java.io.IOException;

import org.apache.commons.lang3.reflect.ConstructorUtils;
import org.apache.hadoop.fs.Path;

import com.google.common.base.Enums;
import com.google.common.base.Optional;

import org.apache.gobblin.annotation.Alpha;
import org.apache.gobblin.configuration.State;
import org.apache.gobblin.hive.avro.HiveAvroSerDeManager;


/**
 * This class manages SerDe properties (including schema properties) for Hive registration.
 *
 * @author Ziyang Liu
 */
@Alpha
public abstract class HiveSerDeManager {

  public static final String HIVE_ROW_FORMAT = "hive.row.format";

  protected final State props;

  protected HiveSerDeManager(State props) {
    this.props = props;
  }

  /**
   * Add the appropriate SerDe properties (including schema properties) to the given {@link HiveRegistrationUnit}.
   *
   * @param path The {@link Path} from where the schema should be obtained.
   * @param hiveUnit The {@link HiveRegistrationUnit} where the serde properties should be added to.
   * @throws IOException
   */
  public abstract void addSerDeProperties(Path path, HiveRegistrationUnit hiveUnit) throws IOException;

  /**
   * Add the appropriate SerDe properties (including schema properties) to the target {@link HiveRegistrationUnit}
   * using the SerDe properties from the source {@link HiveRegistrationUnit}.
   *
   * 

* A benefit of doing this is to avoid obtaining the schema multiple times when creating a table and a partition * with the same schema, or creating several tables and partitions with the same schema. After the first * table/partition is created, one can use the same SerDe properties to create the other tables/partitions. *

*/ public abstract void addSerDeProperties(HiveRegistrationUnit source, HiveRegistrationUnit target) throws IOException; /** * Update the schema in the existing {@link HiveRegistrationUnit} into the schema in the new * {@link HiveRegistrationUnit}. */ public abstract void updateSchema(HiveRegistrationUnit existingUnit, HiveRegistrationUnit newUnit) throws IOException; /** * Whether two {@link HiveRegistrationUnit} have the same schema. */ public abstract boolean haveSameSchema(HiveRegistrationUnit unit1, HiveRegistrationUnit unit2) throws IOException; public enum Implementation { AVRO(HiveAvroSerDeManager.class.getName()); private final String schemaManagerClassName; private Implementation(String schemaManagerClassName) { this.schemaManagerClassName = schemaManagerClassName; } @Override public String toString() { return this.schemaManagerClassName; } } /** * Get an instance of {@link HiveSerDeManager}. * * @param type The {@link HiveSerDeManager} type. It should be either AVRO, or the name of a class that implements * {@link HiveSerDeManager}. The specified {@link HiveSerDeManager} type must have a constructor that takes a * {@link State} object. * @param props A {@link State} object. To get a specific implementation of {@link HiveSerDeManager}, specify either * one of the values in {@link Implementation} (e.g., AVRO) or the name of a class that implements * {@link HiveSerDeManager} in property {@link #HIVE_ROW_FORMAT}. The {@link State} object is also used to * instantiate the {@link HiveSerDeManager}. */ public static HiveSerDeManager get(State props) { String type = props.getProp(HIVE_ROW_FORMAT, Implementation.AVRO.name()); Optional implementation = Enums.getIfPresent(Implementation.class, type.toUpperCase()); try { if (implementation.isPresent()) { return (HiveSerDeManager) ConstructorUtils.invokeConstructor(Class.forName(implementation.get().toString()), props); } return (HiveSerDeManager) ConstructorUtils.invokeConstructor(Class.forName(type), props); } catch (ReflectiveOperationException e) { throw new RuntimeException( "Unable to instantiate " + HiveSerDeManager.class.getSimpleName() + " with type " + type, e); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy