org.apache.hive.hcatalog.mapreduce.PartInfo Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.mapreduce;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Map;
import java.util.Properties;
import org.apache.hadoop.hive.ql.metadata.HiveStorageHandler;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
/** The Class used to serialize the partition information read from the metadata server that maps to a partition. */
public class PartInfo implements Serializable {
private static Logger LOG = LoggerFactory.getLogger(PartInfo.class);
/** The serialization version */
private static final long serialVersionUID = 1L;
/** The partition data-schema. */
private HCatSchema partitionSchema;
/** The information about which input storage handler to use */
private String storageHandlerClassName;
private String inputFormatClassName;
private String outputFormatClassName;
private String serdeClassName;
/** HCat-specific properties set at the partition */
private final Properties hcatProperties;
/** The data location. */
private final String location;
/** The map of partition key names and their values. */
private Map partitionValues;
/** Job properties associated with this parition */
Map jobProperties;
/**
* The table info associated with this partition.
* Not serialized per PartInfo instance. Constant, per table.
*/
transient HCatTableInfo tableInfo;
/**
* Instantiates a new hcat partition info.
* @param partitionSchema the partition schema
* @param storageHandler the storage handler
* @param location the location
* @param hcatProperties hcat-specific properties at the partition
* @param jobProperties the job properties
* @param tableInfo the table information
*/
public PartInfo(HCatSchema partitionSchema, HiveStorageHandler storageHandler,
String location, Properties hcatProperties,
Map jobProperties, HCatTableInfo tableInfo) {
this.partitionSchema = partitionSchema;
this.location = location;
this.hcatProperties = hcatProperties;
this.jobProperties = jobProperties;
this.tableInfo = tableInfo;
this.storageHandlerClassName = storageHandler.getClass().getName();
this.inputFormatClassName = storageHandler.getInputFormatClass().getName();
this.serdeClassName = storageHandler.getSerDeClass().getName();
this.outputFormatClassName = storageHandler.getOutputFormatClass().getName();
}
/**
* Gets the value of partitionSchema.
* @return the partitionSchema
*/
public HCatSchema getPartitionSchema() {
return partitionSchema;
}
/**
* @return the storage handler class name
*/
public String getStorageHandlerClassName() {
return storageHandlerClassName;
}
/**
* @return the inputFormatClassName
*/
public String getInputFormatClassName() {
return inputFormatClassName;
}
/**
* @return the outputFormatClassName
*/
public String getOutputFormatClassName() {
return outputFormatClassName;
}
/**
* @return the serdeClassName
*/
public String getSerdeClassName() {
return serdeClassName;
}
/**
* Gets the input storage handler properties.
* @return HCat-specific properties set at the partition
*/
public Properties getInputStorageHandlerProperties() {
return hcatProperties;
}
/**
* Gets the value of location.
* @return the location
*/
public String getLocation() {
return location;
}
/**
* Sets the partition values.
* @param partitionValues the new partition values
*/
public void setPartitionValues(Map partitionValues) {
this.partitionValues = partitionValues;
}
/**
* Gets the partition values.
* @return the partition values
*/
public Map getPartitionValues() {
return partitionValues;
}
/**
* Gets the job properties.
* @return a map of the job properties
*/
public Map getJobProperties() {
return jobProperties;
}
/**
* Gets the HCatalog table information.
* @return the table information
*/
public HCatTableInfo getTableInfo() {
return tableInfo;
}
void setTableInfo(HCatTableInfo thatTableInfo) {
this.tableInfo = thatTableInfo;
restoreLocalInfoFromTableInfo();
}
/**
* Undoes the effects of compression( dedupWithTableInfo() ) during serialization,
* and restores PartInfo fields to return original data.
* Can be called idempotently, repeatably.
*/
private void restoreLocalInfoFromTableInfo() {
assert tableInfo != null : "TableInfo can't be null at this point.";
if (partitionSchema == null) {
partitionSchema = tableInfo.getDataColumns();
}
if (storageHandlerClassName == null) {
storageHandlerClassName = tableInfo.getStorerInfo().getStorageHandlerClass();
}
if (inputFormatClassName == null) {
inputFormatClassName = tableInfo.getStorerInfo().getIfClass();
}
if (outputFormatClassName == null) {
outputFormatClassName = tableInfo.getStorerInfo().getOfClass();
}
if (serdeClassName == null) {
serdeClassName = tableInfo.getStorerInfo().getSerdeClass();
}
}
/**
* Finds commonalities with TableInfo, and suppresses (nulls) fields if they are identical
*/
private void dedupWithTableInfo() {
assert tableInfo != null : "TableInfo can't be null at this point.";
if (partitionSchema != null) {
if (partitionSchema.equals(tableInfo.getDataColumns())) {
partitionSchema = null;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Can't suppress data-schema. Partition-schema and table-schema seem to differ! "
+ " partitionSchema: " + partitionSchema.getFields()
+ " tableSchema: " + tableInfo.getDataColumns());
}
}
}
if (storageHandlerClassName != null) {
if (storageHandlerClassName.equals(tableInfo.getStorerInfo().getStorageHandlerClass())) {
storageHandlerClassName = null;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Partition's storageHandler (" + storageHandlerClassName + ") " +
"differs from table's storageHandler (" + tableInfo.getStorerInfo().getStorageHandlerClass() + ").");
}
}
}
if (inputFormatClassName != null) {
if (inputFormatClassName.equals(tableInfo.getStorerInfo().getIfClass())) {
inputFormatClassName = null;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Partition's InputFormat (" + inputFormatClassName + ") " +
"differs from table's InputFormat (" + tableInfo.getStorerInfo().getIfClass() + ").");
}
}
}
if (outputFormatClassName != null) {
if (outputFormatClassName.equals(tableInfo.getStorerInfo().getOfClass())) {
outputFormatClassName = null;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Partition's OutputFormat (" + outputFormatClassName + ") " +
"differs from table's OutputFormat (" + tableInfo.getStorerInfo().getOfClass() + ").");
}
}
}
if (serdeClassName != null) {
if (serdeClassName.equals(tableInfo.getStorerInfo().getSerdeClass())) {
serdeClassName = null;
} else {
if (LOG.isDebugEnabled()) {
LOG.debug("Partition's SerDe (" + serdeClassName + ") " +
"differs from table's SerDe (" + tableInfo.getStorerInfo().getSerdeClass() + ").");
}
}
}
}
/**
* Serialization method used by java serialization.
* Suppresses serialization of redundant information that's already available from
* TableInfo before writing out, so as to minimize amount of serialized space but
* restore it back before returning, so that PartInfo object is still usable afterwards
* (See HIVE-8485 and HIVE-11344 for details.)
*/
private void writeObject(ObjectOutputStream oos)
throws IOException {
dedupWithTableInfo();
oos.defaultWriteObject();
restoreLocalInfoFromTableInfo();
}
}