All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hive.hcatalog.mapreduce.PartInfo Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.hive.hcatalog.mapreduce;

import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Map;
import java.util.Properties;

import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;

/** The Class used to serialize the partition information read from the metadata server that maps to a partition. */
public class PartInfo implements Serializable {

  private static Logger LOG = LoggerFactory.getLogger(PartInfo.class);
  /** The serialization version */
  private static final long serialVersionUID = 1L;

  /** The partition data-schema. */
  private HCatSchema partitionSchema;

  /** The information about which input storage handler to use */
  private String storageHandlerClassName;
  private String inputFormatClassName;
  private String outputFormatClassName;
  private String serdeClassName;

  /** HCat-specific properties set at the partition */
  private final Properties hcatProperties;

  /** The data location. */
  private final String location;

  /** The map of partition key names and their values. */
  private Map partitionValues;

  /** Job properties associated with this parition */
  Map jobProperties;

  /**
   * The table info associated with this partition.
   * Not serialized per PartInfo instance. Constant, per table.
   */
  transient HCatTableInfo tableInfo;

  /**
   * Instantiates a new hcat partition info.
   * @param partitionSchema the partition schema
   * @param storageHandler the storage handler
   * @param location the location
   * @param hcatProperties hcat-specific properties at the partition
   * @param jobProperties the job properties
   * @param tableInfo the table information
   */
  public PartInfo(HCatSchema partitionSchema, HiveStorageHandler storageHandler,
          String location, Properties hcatProperties,
          Map jobProperties, HCatTableInfo tableInfo) {
    this.partitionSchema = partitionSchema;
    this.location = location;
    this.hcatProperties = hcatProperties;
    this.jobProperties = jobProperties;
    this.tableInfo = tableInfo;

    this.storageHandlerClassName = storageHandler.getClass().getName();
    this.inputFormatClassName = storageHandler.getInputFormatClass().getName();
    this.serdeClassName = storageHandler.getSerDeClass().getName();
    this.outputFormatClassName = storageHandler.getOutputFormatClass().getName();
  }

  /**
   * Gets the value of partitionSchema.
   * @return the partitionSchema
   */
  public HCatSchema getPartitionSchema() {
    return partitionSchema;
  }

  /**
   * @return the storage handler class name
   */
  public String getStorageHandlerClassName() {
    return storageHandlerClassName;
  }

  /**
   * @return the inputFormatClassName
   */
  public String getInputFormatClassName() {
    return inputFormatClassName;
  }

  /**
   * @return the outputFormatClassName
   */
  public String getOutputFormatClassName() {
    return outputFormatClassName;
  }

  /**
   * @return the serdeClassName
   */
  public String getSerdeClassName() {
    return serdeClassName;
  }

  /**
   * Gets the input storage handler properties.
   * @return HCat-specific properties set at the partition
   */
  public Properties getInputStorageHandlerProperties() {
    return hcatProperties;
  }

  /**
   * Gets the value of location.
   * @return the location
   */
  public String getLocation() {
    return location;
  }

  /**
   * Sets the partition values.
   * @param partitionValues the new partition values
   */
  public void setPartitionValues(Map partitionValues) {
    this.partitionValues = partitionValues;
  }

  /**
   * Gets the partition values.
   * @return the partition values
   */
  public Map getPartitionValues() {
    return partitionValues;
  }

  /**
   * Gets the job properties.
   * @return a map of the job properties
   */
  public Map getJobProperties() {
    return jobProperties;
  }

  /**
   * Gets the HCatalog table information.
   * @return the table information
   */
  public HCatTableInfo getTableInfo() {
    return tableInfo;
  }

  void setTableInfo(HCatTableInfo thatTableInfo) {
    this.tableInfo = thatTableInfo;
    restoreLocalInfoFromTableInfo();
  }

  /**
   * Undoes the effects of compression( dedupWithTableInfo() ) during serialization,
   * and restores PartInfo fields to return original data.
   * Can be called idempotently, repeatably.
   */
  private void restoreLocalInfoFromTableInfo() {
    assert tableInfo != null : "TableInfo can't be null at this point.";
    if (partitionSchema == null) {
      partitionSchema = tableInfo.getDataColumns();
    }

    if (storageHandlerClassName == null) {
      storageHandlerClassName = tableInfo.getStorerInfo().getStorageHandlerClass();
    }

    if (inputFormatClassName == null) {
      inputFormatClassName = tableInfo.getStorerInfo().getIfClass();
    }

    if (outputFormatClassName == null) {
      outputFormatClassName = tableInfo.getStorerInfo().getOfClass();
    }

    if (serdeClassName == null) {
      serdeClassName = tableInfo.getStorerInfo().getSerdeClass();
    }
  }

  /**
   * Finds commonalities with TableInfo, and suppresses (nulls) fields if they are identical
   */
  private void dedupWithTableInfo() {
    assert tableInfo != null : "TableInfo can't be null at this point.";
    if (partitionSchema != null) {
      if (partitionSchema.equals(tableInfo.getDataColumns())) {
        partitionSchema = null;
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Can't suppress data-schema. Partition-schema and table-schema seem to differ! "
              + " partitionSchema: " + partitionSchema.getFields()
              + " tableSchema: " + tableInfo.getDataColumns());
        }
      }
    }

    if (storageHandlerClassName != null) {
      if (storageHandlerClassName.equals(tableInfo.getStorerInfo().getStorageHandlerClass())) {
        storageHandlerClassName = null;
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Partition's storageHandler (" + storageHandlerClassName + ") " +
              "differs from table's storageHandler (" + tableInfo.getStorerInfo().getStorageHandlerClass() + ").");
        }
      }
    }

    if (inputFormatClassName != null) {
      if (inputFormatClassName.equals(tableInfo.getStorerInfo().getIfClass())) {
        inputFormatClassName = null;
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Partition's InputFormat (" + inputFormatClassName + ") " +
              "differs from table's InputFormat (" + tableInfo.getStorerInfo().getIfClass() + ").");
        }
      }
    }

    if (outputFormatClassName != null) {
      if (outputFormatClassName.equals(tableInfo.getStorerInfo().getOfClass())) {
        outputFormatClassName = null;
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Partition's OutputFormat (" + outputFormatClassName + ") " +
              "differs from table's OutputFormat (" + tableInfo.getStorerInfo().getOfClass() + ").");
        }
      }
    }

    if (serdeClassName != null) {
      if (serdeClassName.equals(tableInfo.getStorerInfo().getSerdeClass())) {
        serdeClassName = null;
      } else {
        if (LOG.isDebugEnabled()) {
          LOG.debug("Partition's SerDe (" + serdeClassName + ") " +
              "differs from table's SerDe (" + tableInfo.getStorerInfo().getSerdeClass() + ").");
        }
      }
    }
  }

  /**
   * Serialization method used by java serialization.
   * Suppresses serialization of redundant information that's already available from
   * TableInfo before writing out, so as to minimize amount of serialized space but
   * restore it back before returning, so that PartInfo object is still usable afterwards
   * (See HIVE-8485 and HIVE-11344 for details.)
   */
  private void writeObject(ObjectOutputStream oos)
      throws IOException {
    dedupWithTableInfo();
    oos.defaultWriteObject();
    restoreLocalInfoFromTableInfo();
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy