org.apache.hadoop.hive.ql.metadata.HiveStorageHandler Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.metadata;
import java.util.Map;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.hive.metastore.HiveMetaHook;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputFormat;
/**
* HiveStorageHandler defines a pluggable interface for adding
* new storage handlers to Hive. A storage handler consists of
* a bundle of the following:
*
*
*- input format
*
- output format
*
- serde
*
- metadata hooks for keeping an external catalog in sync
* with Hive's metastore
*
- rules for setting up the configuration properties on
* map/reduce jobs which access tables stored by this handler
*
*
* Storage handler classes are plugged in using the STORED BY 'classname'
* clause in CREATE TABLE.
*/
public interface HiveStorageHandler extends Configurable {
/**
* @return Class providing an implementation of {@link InputFormat}
*/
public Class extends InputFormat> getInputFormatClass();
/**
* @return Class providing an implementation of {@link OutputFormat}
*/
public Class extends OutputFormat> getOutputFormatClass();
/**
* @return Class providing an implementation of {@link SerDe}
*/
public Class extends SerDe> getSerDeClass();
/**
* @return metadata hook implementation, or null if this
* storage handler does not need any metadata notifications
*/
public HiveMetaHook getMetaHook();
/**
* Returns the implementation specific authorization provider
*
* @return authorization provider
* @throws HiveException
*/
public HiveAuthorizationProvider getAuthorizationProvider()
throws HiveException;
/**
* This method is called to allow the StorageHandlers the chance
* to populate the JobContext.getConfiguration() with properties that
* maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
* Key value pairs passed into jobProperties are guaranteed to be set in the job's
* configuration object. User's can retrieve "context" information from tableDesc.
* User's should avoid mutating tableDesc and only make changes in jobProperties.
* This method is expected to be idempotent such that a job called with the
* same tableDesc values should return the same key-value pairs in jobProperties.
* Any external state set by this method should remain the same if this method is
* called again. It is up to the user to determine how best guarantee this invariant.
*
* This method in particular is to create a configuration for input.
* @param tableDesc descriptor for the table being accessed
* @param jobProperties receives properties copied or transformed
* from the table properties
*/
public abstract void configureInputJobProperties(TableDesc tableDesc,
Map jobProperties);
/**
* This method is called to allow the StorageHandlers the chance
* to populate the JobContext.getConfiguration() with properties that
* maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
* Key value pairs passed into jobProperties are guaranteed to be set in the job's
* configuration object. User's can retrieve "context" information from tableDesc.
* User's should avoid mutating tableDesc and only make changes in jobProperties.
* This method is expected to be idempotent such that a job called with the
* same tableDesc values should return the same key-value pairs in jobProperties.
* Any external state set by this method should remain the same if this method is
* called again. It is up to the user to determine how best guarantee this invariant.
*
* This method in particular is to create a configuration for output.
* @param tableDesc descriptor for the table being accessed
* @param jobProperties receives properties copied or transformed
* from the table properties
*/
public abstract void configureOutputJobProperties(TableDesc tableDesc,
Map jobProperties);
/**
* Deprecated use configureInputJobProperties/configureOutputJobProperties
* methods instead.
*
* Configures properties for a job based on the definition of the
* source or target table it accesses.
*
* @param tableDesc descriptor for the table being accessed
*
* @param jobProperties receives properties copied or transformed
* from the table properties
*/
@Deprecated
public void configureTableJobProperties(
TableDesc tableDesc,
Map jobProperties);
/**
* Called just before submitting MapReduce job.
*
* @param tableDesc descriptor for the table being accessed
* @param JobConf jobConf for MapReduce job
*/
public void configureJobConf(TableDesc tableDesc, JobConf jobConf);
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy