All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.mapreduce.HRegionPartitioner Maven / Gradle / Ivy

The newest version!
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mapreduce;

import java.io.IOException;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapred.TableOutputFormat;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.mapreduce.Partitioner;

/**
 * This is used to partition the output keys into groups of keys.
 * Keys are grouped according to the regions that currently exist
 * so that each reducer fills a single region so load is distributed.
 *
 * 

This class is not suitable as partitioner creating hfiles * for incremental bulk loads as region spread will likely change between time of * hfile creation and load time. See {@link LoadIncrementalHFiles} * and Bulk Load. * * @param The type of the key. * @param The type of the value. */ @InterfaceAudience.Public @InterfaceStability.Stable public class HRegionPartitioner extends Partitioner implements Configurable { private static final Log LOG = LogFactory.getLog(HRegionPartitioner.class); private Configuration conf = null; // Connection and locator are not cleaned up; they just die when partitioner is done. private Connection connection; private RegionLocator locator; private byte[][] startKeys; /** * Gets the partition number for a given key (hence record) given the total * number of partitions i.e. number of reduce-tasks for the job. * *

Typically a hash function on a all or a subset of the key.

* * @param key The key to be partitioned. * @param value The entry value. * @param numPartitions The total number of partitions. * @return The partition number for the key. * @see org.apache.hadoop.mapreduce.Partitioner#getPartition( * java.lang.Object, java.lang.Object, int) */ @Override public int getPartition(ImmutableBytesWritable key, VALUE value, int numPartitions) { byte[] region = null; // Only one region return 0 if (this.startKeys.length == 1){ return 0; } try { // Not sure if this is cached after a split so we could have problems // here if a region splits while mapping region = this.locator.getRegionLocation(key.get()).getRegionInfo().getStartKey(); } catch (IOException e) { LOG.error(e); } for (int i = 0; i < this.startKeys.length; i++){ if (Bytes.compareTo(region, this.startKeys[i]) == 0 ){ if (i >= numPartitions-1){ // cover if we have less reduces then regions. return (Integer.toString(i).hashCode() & Integer.MAX_VALUE) % numPartitions; } return i; } } // if above fails to find start key that match we need to return something return 0; } /** * Returns the current configuration. * * @return The current configuration. * @see org.apache.hadoop.conf.Configurable#getConf() */ @Override public Configuration getConf() { return conf; } /** * Sets the configuration. This is used to determine the start keys for the * given table. * * @param configuration The configuration to set. * @see org.apache.hadoop.conf.Configurable#setConf( * org.apache.hadoop.conf.Configuration) */ @Override public void setConf(Configuration configuration) { this.conf = HBaseConfiguration.create(configuration); try { this.connection = ConnectionFactory.createConnection(HBaseConfiguration.create(conf)); TableName tableName = TableName.valueOf(conf.get(TableOutputFormat.OUTPUT_TABLE)); this.locator = this.connection.getRegionLocator(tableName); } catch (IOException e) { LOG.error(e); } try { this.startKeys = this.locator.getStartKeys(); } catch (IOException e) { LOG.error(e); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy