org.apache.hadoop.hbase.mapreduce.HFileOutputFormat Maven / Gradle / Ivy
Show all versions of hbase-server Show documentation
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.io.compress.Compression.Algorithm;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.google.common.annotations.VisibleForTesting;
/**
* Writes HFiles. Passed KeyValues must arrive in order.
* Writes current time as the sequence id for the file. Sets the major compacted
* attribute on created hfiles. Calling write(null,null) will forcibly roll
* all HFiles being written.
*
* Using this class as part of a MapReduce job is best done
* using {@link #configureIncrementalLoad(Job, HTable)}.
* @see KeyValueSortReducer
* @deprecated use {@link HFileOutputFormat2} instead.
*/
@Deprecated
@InterfaceAudience.Public
@InterfaceStability.Stable
public class HFileOutputFormat extends FileOutputFormat {
private static final Log LOG = LogFactory.getLog(HFileOutputFormat.class);
// This constant is public since the client can modify this when setting
// up their conf object and thus refer to this symbol.
// It is present for backwards compatibility reasons. Use it only to
// override the auto-detection of datablock encoding.
public static final String DATABLOCK_ENCODING_OVERRIDE_CONF_KEY =
HFileOutputFormat2.DATABLOCK_ENCODING_OVERRIDE_CONF_KEY;
@Override
public RecordWriter getRecordWriter(
final TaskAttemptContext context) throws IOException, InterruptedException {
return HFileOutputFormat2.createRecordWriter(context, this.getOutputCommitter(context));
}
/**
* Configure a MapReduce Job to perform an incremental load into the given
* table. This
*
* - Inspects the table to configure a total order partitioner
* - Uploads the partitions file to the cluster and adds it to the DistributedCache
* - Sets the number of reduce tasks to match the current number of regions
* - Sets the output key/value class to match HFileOutputFormat's requirements
* - Sets the reducer up to perform the appropriate sorting (either KeyValueSortReducer or
* PutSortReducer)
*
* The user should be sure to set the map output value class to either KeyValue or Put before
* running this function.
*/
public static void configureIncrementalLoad(Job job, HTable table)
throws IOException {
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(),
table.getRegionLocator());
}
/**
* Runs inside the task to deserialize column family to compression algorithm
* map from the configuration.
*
* @param conf to read the serialized values from
* @return a map from column family to the configured compression algorithm
*/
@VisibleForTesting
static Map createFamilyCompressionMap(Configuration
conf) {
return HFileOutputFormat2.createFamilyCompressionMap(conf);
}
/**
* Runs inside the task to deserialize column family to bloom filter type
* map from the configuration.
*
* @param conf to read the serialized values from
* @return a map from column family to the the configured bloom filter type
*/
@VisibleForTesting
static Map createFamilyBloomTypeMap(Configuration conf) {
return HFileOutputFormat2.createFamilyBloomTypeMap(conf);
}
/**
* Runs inside the task to deserialize column family to block size
* map from the configuration.
*
* @param conf to read the serialized values from
* @return a map from column family to the configured block size
*/
@VisibleForTesting
static Map createFamilyBlockSizeMap(Configuration conf) {
return HFileOutputFormat2.createFamilyBlockSizeMap(conf);
}
/**
* Runs inside the task to deserialize column family to data block encoding
* type map from the configuration.
*
* @param conf to read the serialized values from
* @return a map from column family to HFileDataBlockEncoder for the
* configured data block type for the family
*/
@VisibleForTesting
static Map createFamilyDataBlockEncodingMap(
Configuration conf) {
return HFileOutputFormat2.createFamilyDataBlockEncodingMap(conf);
}
/**
* Configure job
with a TotalOrderPartitioner, partitioning against
* splitPoints
. Cleans up the partitions file after job exists.
*/
static void configurePartitioner(Job job, List splitPoints)
throws IOException {
HFileOutputFormat2.configurePartitioner(job, splitPoints);
}
static void configureCompression(Table table, Configuration conf) throws IOException {
HFileOutputFormat2.configureCompression(conf, table.getTableDescriptor());
}
/**
* Serialize column family to block size map to configuration.
* Invoked while configuring the MR job for incremental load.
*
* @param table to read the properties from
* @param conf to persist serialized values into
* @throws IOException
* on failure to read column family descriptors
*/
@VisibleForTesting
static void configureBlockSize(Table table, Configuration conf) throws IOException {
HFileOutputFormat2.configureBlockSize(table.getTableDescriptor(), conf);
}
/**
* Serialize column family to bloom type map to configuration.
* Invoked while configuring the MR job for incremental load.
*
* @param table to read the properties from
* @param conf to persist serialized values into
* @throws IOException
* on failure to read column family descriptors
*/
@VisibleForTesting
static void configureBloomType(Table table, Configuration conf) throws IOException {
HFileOutputFormat2.configureBloomType(table.getTableDescriptor(), conf);
}
/**
* Serialize column family to data block encoding map to configuration.
* Invoked while configuring the MR job for incremental load.
*
* @param table to read the properties from
* @param conf to persist serialized values into
* @throws IOException
* on failure to read column family descriptors
*/
@VisibleForTesting
static void configureDataBlockEncoding(Table table,
Configuration conf) throws IOException {
HTableDescriptor tableDescriptor = table.getTableDescriptor();
HFileOutputFormat2.configureDataBlockEncoding(tableDescriptor, conf);
}
}