All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.metadata.HiveStorageHandler Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.metadata;

import java.util.Map;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;

/**
 * HiveStorageHandler defines a pluggable interface for adding
 * new storage handlers to Hive.  A storage handler consists of
 * a bundle of the following:
 *
 *
    *
  • input format *
  • output format *
  • serde *
  • metadata hooks for keeping an external catalog in sync * with Hive's metastore *
  • rules for setting up the configuration properties on * map/reduce jobs which access tables stored by this handler *
* * Storage handler classes are plugged in using the STORED BY 'classname' * clause in CREATE TABLE. */ public interface HiveStorageHandler extends Configurable { /** * @return Class providing an implementation of {@link InputFormat} */ public Class getInputFormatClass(); /** * @return Class providing an implementation of {@link OutputFormat} */ public Class getOutputFormatClass(); /** * @return Class providing an implementation of {@link SerDe} */ public Class getSerDeClass(); /** * @return metadata hook implementation, or null if this * storage handler does not need any metadata notifications */ public HiveMetaHook getMetaHook(); /** * Returns the implementation specific authorization provider * * @return authorization provider * @throws HiveException */ public HiveAuthorizationProvider getAuthorizationProvider() throws HiveException; /** * This method is called to allow the StorageHandlers the chance * to populate the JobContext.getConfiguration() with properties that * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). * Key value pairs passed into jobProperties are guaranteed to be set in the job's * configuration object. User's can retrieve "context" information from tableDesc. * User's should avoid mutating tableDesc and only make changes in jobProperties. * This method is expected to be idempotent such that a job called with the * same tableDesc values should return the same key-value pairs in jobProperties. * Any external state set by this method should remain the same if this method is * called again. It is up to the user to determine how best guarantee this invariant. * * This method in particular is to create a configuration for input. * @param tableDesc descriptor for the table being accessed * @param jobProperties receives properties copied or transformed * from the table properties */ public abstract void configureInputJobProperties(TableDesc tableDesc, Map jobProperties); /** * This method is called to allow the StorageHandlers the chance * to populate the JobContext.getConfiguration() with properties that * maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc). * Key value pairs passed into jobProperties are guaranteed to be set in the job's * configuration object. User's can retrieve "context" information from tableDesc. * User's should avoid mutating tableDesc and only make changes in jobProperties. * This method is expected to be idempotent such that a job called with the * same tableDesc values should return the same key-value pairs in jobProperties. * Any external state set by this method should remain the same if this method is * called again. It is up to the user to determine how best guarantee this invariant. * * This method in particular is to create a configuration for output. * @param tableDesc descriptor for the table being accessed * @param jobProperties receives properties copied or transformed * from the table properties */ public abstract void configureOutputJobProperties(TableDesc tableDesc, Map jobProperties); /** * Deprecated use configureInputJobProperties/configureOutputJobProperties * methods instead. * * Configures properties for a job based on the definition of the * source or target table it accesses. * * @param tableDesc descriptor for the table being accessed * * @param jobProperties receives properties copied or transformed * from the table properties */ @Deprecated public void configureTableJobProperties( TableDesc tableDesc, Map jobProperties); /** * Called just before submitting MapReduce job. * * @param tableDesc descriptor for the table being accessed * @param JobConf jobConf for MapReduce job */ public void configureJobConf(TableDesc tableDesc, JobConf jobConf); }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy