All Downloads are FREE. Search and download functionalities are using the official Maven repository.

hivemall.hcatalog.common.HCatConstants Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.common;

import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;

public final class HCatConstants {

  public static final String HIVE_RCFILE_IF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileInputFormat";
  public static final String HIVE_RCFILE_OF_CLASS = "org.apache.hadoop.hive.ql.io.RCFileOutputFormat";

  public static final String SEQUENCEFILE_INPUT = SequenceFileInputFormat.class.getName();
  public static final String SEQUENCEFILE_OUTPUT = SequenceFileOutputFormat.class.getName();

  public static final String HCAT_PIG_STORAGE_CLASS = "org.apache.pig.builtin.PigStorage";
  public static final String HCAT_PIG_LOADER = "hcat.pig.loader";
  public static final String HCAT_PIG_LOADER_LOCATION_SET = HCAT_PIG_LOADER + ".location.set";
  public static final String HCAT_PIG_LOADER_ARGS = "hcat.pig.loader.args";
  public static final String HCAT_PIG_STORER = "hcat.pig.storer";
  public static final String HCAT_PIG_STORER_ARGS = "hcat.pig.storer.args";
  public static final String HCAT_PIG_ARGS_DELIMIT = "hcat.pig.args.delimiter";
  public static final String HCAT_PIG_ARGS_DELIMIT_DEFAULT = ",";
  public static final String HCAT_PIG_STORER_LOCATION_SET = HCAT_PIG_STORER + ".location.set";
  public static final String HCAT_PIG_INNER_TUPLE_NAME = "hcat.pig.inner.tuple.name";
  public static final String HCAT_PIG_INNER_TUPLE_NAME_DEFAULT = "innertuple";
  public static final String HCAT_PIG_INNER_FIELD_NAME = "hcat.pig.inner.field.name";
  public static final String HCAT_PIG_INNER_FIELD_NAME_DEFAULT = "innerfield";

  /**
   * {@value} (default: null)
   * When the property is set in the UDFContext of the org.apache.hive.hcatalog.pig.HCatStorer, HCatStorer writes
   * to the location it specifies instead of the default HCatalog location format. An example can be found
   * in org.apache.hive.hcatalog.pig.HCatStorerWrapper.
   */
  public static final String HCAT_PIG_STORER_EXTERNAL_LOCATION = HCAT_PIG_STORER + ".external.location";

  //The keys used to store info into the job Configuration
  public static final String HCAT_KEY_BASE = "mapreduce.lib.hcat";

  public static final String HCAT_KEY_OUTPUT_SCHEMA = HCAT_KEY_BASE + ".output.schema";

  public static final String HCAT_KEY_JOB_INFO = HCAT_KEY_BASE + ".job.info";

  // hcatalog specific configurations, that can be put in hive-site.xml
  public static final String HCAT_HIVE_CLIENT_EXPIRY_TIME = "hcatalog.hive.client.cache.expiry.time";
  // config parameter that suggests to hcat that metastore clients not be cached - default is false
  // this parameter allows highly-parallel hcat usescases to not gobble up too many connections that
  // sit in the cache, while not in use.
  public static final String HCAT_HIVE_CLIENT_DISABLE_CACHE = "hcatalog.hive.client.cache.disabled";

  private HCatConstants() { // restrict instantiation
  }

  public static final String HCAT_TABLE_SCHEMA = "hcat.table.schema";

  public static final String HCAT_METASTORE_URI = HiveConf.ConfVars.METASTOREURIS.varname;

  public static final String HCAT_PERMS = "hcat.perms";

  public static final String HCAT_GROUP = "hcat.group";

  public static final String HCAT_CREATE_TBL_NAME = "hcat.create.tbl.name";

  public static final String HCAT_CREATE_DB_NAME = "hcat.create.db.name";

  public static final String HCAT_METASTORE_PRINCIPAL
    = HiveConf.ConfVars.METASTORE_KERBEROS_PRINCIPAL.varname;

  /**
   * The desired number of input splits produced for each partition. When the
   * input files are large and few, we want to split them into many splits,
   * so as to increase the parallelizm of loading the splits. Try also two
   * other parameters, mapred.min.split.size and mapred.max.split.size for
   * hadoop 1.x, or mapreduce.input.fileinputformat.split.minsize and
   * mapreduce.input.fileinputformat.split.maxsize in hadoop 2.x to
   * control the number of input splits.
   */
  public static final String HCAT_DESIRED_PARTITION_NUM_SPLITS =
    "hcat.desired.partition.num.splits";

  // IMPORTANT IMPORTANT IMPORTANT!!!!!
  //The keys used to store info into the job Configuration.
  //If any new keys are added, the HCatStorer needs to be updated. The HCatStorer
  //updates the job configuration in the backend to insert these keys to avoid
  //having to call setOutput from the backend (which would cause a metastore call
  //from the map jobs)
  public static final String HCAT_KEY_OUTPUT_BASE = "mapreduce.lib.hcatoutput";
  public static final String HCAT_KEY_OUTPUT_INFO = HCAT_KEY_OUTPUT_BASE + ".info";
  public static final String HCAT_KEY_HIVE_CONF = HCAT_KEY_OUTPUT_BASE + ".hive.conf";
  public static final String HCAT_KEY_TOKEN_SIGNATURE = HCAT_KEY_OUTPUT_BASE + ".token.sig";

  public static final String[] OUTPUT_CONFS_TO_SAVE = {
    HCAT_KEY_OUTPUT_INFO,
    HCAT_KEY_HIVE_CONF,
    HCAT_KEY_TOKEN_SIGNATURE
  };


  public static final String HCAT_MSG_CLEAN_FREQ = "hcat.msg.clean.freq";
  public static final String HCAT_MSG_EXPIRY_DURATION = "hcat.msg.expiry.duration";

  public static final String HCAT_MSGBUS_TOPIC_NAME = "hcat.msgbus.topic.name";
  public static final String HCAT_MSGBUS_TOPIC_NAMING_POLICY = "hcat.msgbus.topic.naming.policy";
  public static final String HCAT_MSGBUS_TOPIC_PREFIX = "hcat.msgbus.topic.prefix";

  public static final String HCAT_OUTPUT_ID_HASH = HCAT_KEY_OUTPUT_BASE + ".id";

  public static final String HCAT_DYNAMIC_PTN_JOBID = HCAT_KEY_OUTPUT_BASE + ".dynamic.jobid";
  public static final boolean HCAT_IS_DYNAMIC_MAX_PTN_CHECK_ENABLED = false;
  public static final String HCAT_DYNAMIC_CUSTOM_PATTERN = "hcat.dynamic.partitioning.custom.pattern";

  // Message Bus related properties.
  public static final String HCAT_DEFAULT_TOPIC_PREFIX = "hcat";
  public static final String HCAT_EVENT = "HCAT_EVENT";
  public static final String HCAT_ADD_PARTITION_EVENT = "ADD_PARTITION";
  public static final String HCAT_DROP_PARTITION_EVENT = "DROP_PARTITION";
  public static final String HCAT_PARTITION_DONE_EVENT = "PARTITION_DONE";
  public static final String HCAT_CREATE_TABLE_EVENT = "CREATE_TABLE";
  public static final String HCAT_DROP_TABLE_EVENT = "DROP_TABLE";
  public static final String HCAT_CREATE_DATABASE_EVENT = "CREATE_DATABASE";
  public static final String HCAT_DROP_DATABASE_EVENT = "DROP_DATABASE";
  public static final String HCAT_MESSAGE_VERSION = "HCAT_MESSAGE_VERSION";
  public static final String HCAT_MESSAGE_FORMAT = "HCAT_MESSAGE_FORMAT";
  public static final String CONF_LABEL_HCAT_MESSAGE_FACTORY_IMPL_PREFIX = "hcatalog.message.factory.impl.";
  public static final String CONF_LABEL_HCAT_MESSAGE_FORMAT = "hcatalog.message.format";
  public static final String DEFAULT_MESSAGE_FACTORY_IMPL = "org.apache.hive.hcatalog.messaging.json.JSONMessageFactory";

  // System environment variables
  public static final String SYSENV_HADOOP_TOKEN_FILE_LOCATION = "HADOOP_TOKEN_FILE_LOCATION";

  // Hadoop Conf Var Names
  public static final String CONF_MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary";

  //***************************************************************************
  // Data-related configuration properties.
  //***************************************************************************

  /**
   * {@value} (default: {@value #HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT}).
   * Pig < 0.10.0 does not have boolean support, and scripts written for pre-boolean Pig versions
   * will not expect boolean values when upgrading Pig. For integration the option is offered to
   * convert boolean fields to integers by setting this Hadoop configuration key.
   */
  public static final String HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER =
    "hcat.data.convert.boolean.to.integer";
  public static final boolean HCAT_DATA_CONVERT_BOOLEAN_TO_INTEGER_DEFAULT = false;

  /**
   * {@value} (default: {@value #HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT}).
   * Hive tables support tinyint and smallint columns, while not all processing frameworks support
   * these types (Pig only has integer for example). Enable this property to promote tinyint and
   * smallint columns to integer at runtime. Note that writes to tinyint and smallint columns
   * enforce bounds checking and jobs will fail if attempting to write values outside the column
   * bounds.
   */
  public static final String HCAT_DATA_TINY_SMALL_INT_PROMOTION =
    "hcat.data.tiny.small.int.promotion";
  public static final boolean HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT = false;

  /**
   * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT}).
   * Threshold for the ratio of bad records that will be silently skipped without causing a task
   * failure. This is useful when processing large data sets with corrupt records, when its
   * acceptable to skip some bad records.
   */
  public static final String HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY = "hcat.input.bad.record.threshold";
  public static final float HCAT_INPUT_BAD_RECORD_THRESHOLD_DEFAULT = 0.0001f;

  /**
   * {@value} (default: {@value #HCAT_INPUT_BAD_RECORD_MIN_DEFAULT}).
   * Number of bad records that will be accepted before applying
   * {@value #HCAT_INPUT_BAD_RECORD_THRESHOLD_KEY}. This is necessary to prevent an initial bad
   * record from causing a task failure.
   */
  public static final String HCAT_INPUT_BAD_RECORD_MIN_KEY = "hcat.input.bad.record.min";
  public static final int HCAT_INPUT_BAD_RECORD_MIN_DEFAULT = 2;
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy