org.apache.hadoop.hbase.util.RegionSplitter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of phoenix-client
Phoenix Client
There is a newer version: 4.15.0-HBase-1.5
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.io.IOException;
import java.math.BigInteger;

import java.util.Arrays;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.ClusterConnection;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.NoServerForRegionException;
import org.apache.hadoop.hbase.client.RegionLocator;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.regionserver.HRegionFileSystem;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;

/**
 * The {@link RegionSplitter} class provides several utilities to help in the
 * administration lifecycle for developers who choose to manually split regions
 * instead of having HBase handle that automatically. The most useful utilities
 * are:
 * 
 * 

 * Create a table with a specified number of pre-split regions
 * 
Execute a rolling split of all regions on an existing table
 * 
 * 
 * Both operations can be safely done on a live server.
 * 

 * Question: How do I turn off automatic splitting? 

 * Answer: Automatic splitting is determined by the configuration value
 * HConstants.HREGION_MAX_FILESIZE. It is not recommended that you set this
 * to Long.MAX_VALUE in case you forget about manual splits. A suggested setting
 * is 100GB, which would result in > 1hr major compactions if reached.
 * 

 * Question: Why did the original authors decide to manually split? 

 * Answer: Specific workload characteristics of our use case allowed us
 * to benefit from a manual split system.
 * 

 * 

 * Data (~1k) that would grow instead of being replaced
 * 
Data growth was roughly uniform across all regions
 * 
OLTP workload. Data loss is a big deal.
 * 
 * 
 * Question: Why is manual splitting good for this workload? 

 * Answer: Although automated splitting is not a bad option, there are
 * benefits to manual splitting.
 * 

 * 

 * With growing amounts of data, splits will continually be needed. Since
 * you always know exactly what regions you have, long-term debugging and
 * profiling is much easier with manual splits. It is hard to trace the logs to
 * understand region level problems if it keeps splitting and getting renamed.
 * 
Data offlining bugs + unknown number of split regions == oh crap! If an
 * WAL or StoreFile was mistakenly unprocessed by HBase due to a weird bug and
 * you notice it a day or so later, you can be assured that the regions
 * specified in these files are the same as the current regions and you have
 * less headaches trying to restore/replay your data.
 * 
You can finely tune your compaction algorithm. With roughly uniform data
 * growth, it's easy to cause split / compaction storms as the regions all
 * roughly hit the same data size at the same time. With manual splits, you can
 * let staggered, time-based major compactions spread out your network IO load.
 * 
 * 
 * Question: What's the optimal number of pre-split regions to create? 

 * Answer: Mileage will vary depending upon your application.
 * 

 * The short answer for our application is that we started with 10 pre-split
 * regions / server and watched our data growth over time. It's better to err on
 * the side of too little regions and rolling split later.
 * 

 * The more complicated answer is that this depends upon the largest storefile
 * in your region. With a growing data size, this will get larger over time. You
 * want the largest region to be just big enough that the
 * {@link org.apache.hadoop.hbase.regionserver.HStore} compact
 * selection algorithm only compacts it due to a timed major. If you don't, your
 * cluster can be prone to compaction storms as the algorithm decides to run
 * major compactions on a large series of regions all at once. Note that
 * compaction storms are due to the uniform data growth, not the manual split
 * decision.
 * 

 * If you pre-split your regions too thin, you can increase the major compaction
 * interval by configuring HConstants.MAJOR_COMPACTION_PERIOD. If your data size
 * grows too large, use this script to perform a network IO safe rolling split
 * of all regions.
 */
@InterfaceAudience.Private
public class RegionSplitter {
  private static final Log LOG = LogFactory.getLog(RegionSplitter.class);

  /**
   * A generic interface for the RegionSplitter code to use for all it's
   * functionality. Note that the original authors of this code use
   * {@link HexStringSplit} to partition their table and set it as default, but
   * provided this for your custom algorithm. To use, create a new derived class
   * from this interface and call {@link RegionSplitter#createPresplitTable} or
   * RegionSplitter#rollingSplit(TableName, SplitAlgorithm, Configuration) with the
   * argument splitClassName giving the name of your class.
   */
  public interface SplitAlgorithm {
    /**
     * Split a pre-existing region into 2 regions.
     *
     * @param start
     *          first row (inclusive)
     * @param end
     *          last row (exclusive)
     * @return the split row to use
     */
    byte[] split(byte[] start, byte[] end);

    /**
     * Split an entire table.
     *
     * @param numRegions
     *          number of regions to split the table into
     *
     * @throws RuntimeException
     *           user input is validated at this time. may throw a runtime
     *           exception in response to a parse failure
     * @return array of split keys for the initial regions of the table. The
     *         length of the returned array should be numRegions-1.
     */
    byte[][] split(int numRegions);

    /**
     * Some MapReduce jobs may want to run multiple mappers per region,
     * this is intended for such usecase.
     *
     * @param start first row (inclusive)
     * @param end last row (exclusive)
     * @param numSplits number of splits to generate
     * @param inclusive whether start and end are returned as split points
     */
    byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);

    /**
     * In HBase, the first row is represented by an empty byte array. This might
     * cause problems with your split algorithm or row printing. All your APIs
     * will be passed firstRow() instead of empty array.
     *
     * @return your representation of your first row
     */
    byte[] firstRow();

    /**
     * In HBase, the last row is represented by an empty byte array. This might
     * cause problems with your split algorithm or row printing. All your APIs
     * will be passed firstRow() instead of empty array.
     *
     * @return your representation of your last row
     */
    byte[] lastRow();

    /**
     * In HBase, the last row is represented by an empty byte array. Set this
     * value to help the split code understand how to evenly divide the first
     * region.
     *
     * @param userInput
     *          raw user input (may throw RuntimeException on parse failure)
     */
    void setFirstRow(String userInput);

    /**
     * In HBase, the last row is represented by an empty byte array. Set this
     * value to help the split code understand how to evenly divide the last
     * region. Note that this last row is inclusive for all rows sharing the
     * same prefix.
     *
     * @param userInput
     *          raw user input (may throw RuntimeException on parse failure)
     */
    void setLastRow(String userInput);

    /**
     * @param input
     *          user or file input for row
     * @return byte array representation of this row for HBase
     */
    byte[] strToRow(String input);

    /**
     * @param row
     *          byte array representing a row in HBase
     * @return String to use for debug & file printing
     */
    String rowToStr(byte[] row);

    /**
     * @return the separator character to use when storing / printing the row
     */
    String separator();

    /**
     * Set the first row
     * @param userInput byte array of the row key.
     */
    void setFirstRow(byte[] userInput);

    /**
     * Set the last row
     * @param userInput byte array of the row key.
     */
    void setLastRow(byte[] userInput);
  }

  /**
   * The main function for the RegionSplitter application. Common uses:
   * 

   * 

   * create a table named 'myTable' with 60 pre-split regions containing 2
   * column families 'test' & 'rs', assuming the keys are hex-encoded ASCII:
   * 
   * bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 60 -f test:rs
   * myTable HexStringSplit
   * 
   * 
create a table named 'myTable' with 50 pre-split regions,
   * assuming the keys are decimal-encoded ASCII:
   * 
   * bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -c 50
   * myTable DecimalStringSplit
   * 
   * 
perform a rolling split of 'myTable' (i.e. 60 => 120 regions), # 2
   * outstanding splits at a time, assuming keys are uniformly distributed
   * bytes:
   * 
   * bin/hbase org.apache.hadoop.hbase.util.RegionSplitter -r -o 2 myTable
   * UniformSplit
   * 
   * 
   *
   * There are three SplitAlgorithms built into RegionSplitter, HexStringSplit,
   * DecimalStringSplit, and UniformSplit. These are different strategies for
   * choosing region boundaries. See their source code for details.
   *
   * @param args
   *          Usage: RegionSplitter <TABLE> <SPLITALGORITHM>
   *          <-c <# regions> -f <family:family:...> | -r
   *          [-o <# outstanding splits>]>
   *          [-D <conf.param=value>]
   * @throws IOException
   *           HBase IO problem
   * @throws InterruptedException
   *           user requested exit
   * @throws ParseException
   *           problem parsing user input
   */
  @SuppressWarnings("static-access")
  public static void main(String[] args) throws IOException,
      InterruptedException, ParseException {
    Configuration conf = HBaseConfiguration.create();

    // parse user input
    Options opt = new Options();
    opt.addOption(OptionBuilder.withArgName("property=value").hasArg()
        .withDescription("Override HBase Configuration Settings").create("D"));
    opt.addOption(OptionBuilder.withArgName("region count").hasArg()
        .withDescription(
            "Create a new table with a pre-split number of regions")
        .create("c"));
    opt.addOption(OptionBuilder.withArgName("family:family:...").hasArg()
        .withDescription(
            "Column Families to create with new table.  Required with -c")
        .create("f"));
    opt.addOption("h", false, "Print this usage help");
    opt.addOption("r", false, "Perform a rolling split of an existing region");
    opt.addOption(OptionBuilder.withArgName("count").hasArg().withDescription(
        "Max outstanding splits that have unfinished major compactions")
        .create("o"));
    opt.addOption(null, "firstrow", true,
        "First Row in Table for Split Algorithm");
    opt.addOption(null, "lastrow", true,
        "Last Row in Table for Split Algorithm");
    opt.addOption(null, "risky", false,
        "Skip verification steps to complete quickly."
            + "STRONGLY DISCOURAGED for production systems.  ");
    CommandLine cmd = new GnuParser().parse(opt, args);

    if (cmd.hasOption("D")) {
      for (String confOpt : cmd.getOptionValues("D")) {
        String[] kv = confOpt.split("=", 2);
        if (kv.length == 2) {
          conf.set(kv[0], kv[1]);
          LOG.debug("-D configuration override: " + kv[0] + "=" + kv[1]);
        } else {
          throw new ParseException("-D option format invalid: " + confOpt);
        }
      }
    }

    if (cmd.hasOption("risky")) {
      conf.setBoolean("split.verify", false);
    }

    boolean createTable = cmd.hasOption("c") && cmd.hasOption("f");
    boolean rollingSplit = cmd.hasOption("r");
    boolean oneOperOnly = createTable ^ rollingSplit;

    if (2 != cmd.getArgList().size() || !oneOperOnly || cmd.hasOption("h")) {
      new HelpFormatter().printHelp("RegionSplitter \n"+
          "SPLITALGORITHM is a java class name of a class implementing " +
          "SplitAlgorithm, or one of the special strings HexStringSplit or " +
          "DecimalStringSplit or UniformSplit, which are built-in split algorithms. " +
          "HexStringSplit treats keys as hexadecimal ASCII, and " +
          "DecimalStringSplit treats keys as decimal ASCII, and " +
          "UniformSplit treats keys as arbitrary bytes.", opt);
      return;
    }
    TableName tableName = TableName.valueOf(cmd.getArgs()[0]);
    String splitClass = cmd.getArgs()[1];
    SplitAlgorithm splitAlgo = newSplitAlgoInstance(conf, splitClass);

    if (cmd.hasOption("firstrow")) {
      splitAlgo.setFirstRow(cmd.getOptionValue("firstrow"));
    }
    if (cmd.hasOption("lastrow")) {
      splitAlgo.setLastRow(cmd.getOptionValue("lastrow"));
    }

    if (createTable) {
      conf.set("split.count", cmd.getOptionValue("c"));
      createPresplitTable(tableName, splitAlgo, cmd.getOptionValue("f").split(":"), conf);
    }

    if (rollingSplit) {
      if (cmd.hasOption("o")) {
        conf.set("split.outstanding", cmd.getOptionValue("o"));
      }
      rollingSplit(tableName, splitAlgo, conf);
    }
  }

  static void createPresplitTable(TableName tableName, SplitAlgorithm splitAlgo,
          String[] columnFamilies, Configuration conf)
  throws IOException, InterruptedException {
    final int splitCount = conf.getInt("split.count", 0);
    Preconditions.checkArgument(splitCount > 1, "Split count must be > 1");

    Preconditions.checkArgument(columnFamilies.length > 0,
        "Must specify at least one column family. ");
    LOG.debug("Creating table " + tableName + " with " + columnFamilies.length
        + " column families.  Presplitting to " + splitCount + " regions");

    HTableDescriptor desc = new HTableDescriptor(tableName);
    for (String cf : columnFamilies) {
      desc.addFamily(new HColumnDescriptor(Bytes.toBytes(cf)));
    }
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
      Admin admin = connection.getAdmin();
      try {
        Preconditions.checkArgument(!admin.tableExists(tableName),
          "Table already exists: " + tableName);
        admin.createTable(desc, splitAlgo.split(splitCount));
      } finally {
        admin.close();
      }
      LOG.debug("Table created!  Waiting for regions to show online in META...");
      if (!conf.getBoolean("split.verify", true)) {
        // NOTE: createTable is synchronous on the table, but not on the regions
        int onlineRegions = 0;
        while (onlineRegions < splitCount) {
          onlineRegions = MetaTableAccessor.getRegionCount(connection, tableName);
          LOG.debug(onlineRegions + " of " + splitCount + " regions online...");
          if (onlineRegions < splitCount) {
            Thread.sleep(10 * 1000); // sleep
          }
        }
      }
      LOG.debug("Finished creating table with " + splitCount + " regions");
    }
  }

  /**
   * Alternative getCurrentNrHRS which is no longer available.
   * @param connection
   * @return Rough count of regionservers out on cluster.
   * @throws IOException 
   */
  private static int getRegionServerCount(final Connection connection) throws IOException {
    try (Admin admin = connection.getAdmin()) {
      ClusterStatus status = admin.getClusterStatus();
      Collection servers = status.getServers();
      return servers == null || servers.isEmpty()? 0: servers.size();
    }
  }

  private static byte [] readFile(final FileSystem fs, final Path path) throws IOException {
    FSDataInputStream tmpIn = fs.open(path);
    try {
      byte [] rawData = new byte[tmpIn.available()];
      tmpIn.readFully(rawData);
      return rawData;
    } finally {
      tmpIn.close();
    }
  }

  static void rollingSplit(TableName tableName, SplitAlgorithm splitAlgo, Configuration conf)
  throws IOException, InterruptedException {
    final int minOS = conf.getInt("split.outstanding", 2);
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
      // Max outstanding splits. default == 50% of servers
      final int MAX_OUTSTANDING = Math.max(getRegionServerCount(connection) / 2, minOS);

      Path hbDir = FSUtils.getRootDir(conf);
      Path tableDir = FSUtils.getTableDir(hbDir, tableName);
      Path splitFile = new Path(tableDir, "_balancedSplit");
      FileSystem fs = FileSystem.get(conf);

      // Get a list of daughter regions to create
      LinkedList> tmpRegionSet = null;
      try (Table table = connection.getTable(tableName)) {
        tmpRegionSet = getSplits(connection, tableName, splitAlgo);
      }
      LinkedList> outstanding = Lists.newLinkedList();
      int splitCount = 0;
      final int origCount = tmpRegionSet.size();

      // all splits must compact & we have 1 compact thread, so 2 split
      // requests to the same RS can stall the outstanding split queue.
      // To fix, group the regions into an RS pool and round-robin through it
      LOG.debug("Bucketing regions by regionserver...");
      TreeMap>> daughterRegions =
          Maps.newTreeMap();
      // Get a regionLocator.  Need it in below.
      try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
        for (Pair dr : tmpRegionSet) {
          ServerName rsLocation = regionLocator.getRegionLocation(dr.getSecond()).getServerName();
          if (!daughterRegions.containsKey(rsLocation)) {
            LinkedList> entry = Lists.newLinkedList();
            daughterRegions.put(rsLocation, entry);
          }
          daughterRegions.get(rsLocation).add(dr);
        }
        LOG.debug("Done with bucketing.  Split time!");
        long startTime = System.currentTimeMillis();

        // Open the split file and modify it as splits finish
        byte[] rawData = readFile(fs, splitFile);

        FSDataOutputStream splitOut = fs.create(splitFile);
        try {
          splitOut.write(rawData);

          try {
            // *** split code ***
            while (!daughterRegions.isEmpty()) {
              LOG.debug(daughterRegions.size() + " RS have regions to splt.");

              // Get ServerName to region count mapping
              final TreeMap rsSizes = Maps.newTreeMap();
              List hrls = regionLocator.getAllRegionLocations();
              for (HRegionLocation hrl: hrls) {
                ServerName sn = hrl.getServerName();
                if (rsSizes.containsKey(sn)) {
                  rsSizes.put(sn, rsSizes.get(sn) + 1);
                } else {
                  rsSizes.put(sn, 1);
                }
              }

              // Round-robin through the ServerName list. Choose the lightest-loaded servers
              // first to keep the master from load-balancing regions as we split.
              for (Map.Entry>> daughterRegion :
                      daughterRegions.entrySet()) {
                Pair dr = null;
                ServerName rsLoc = daughterRegion.getKey();
                LinkedList> regionList = daughterRegion.getValue();

                // Find a region in the ServerName list that hasn't been moved
                LOG.debug("Finding a region on " + rsLoc);
                while (!regionList.isEmpty()) {
                  dr = regionList.pop();

                  // get current region info
                  byte[] split = dr.getSecond();
                  HRegionLocation regionLoc = regionLocator.getRegionLocation(split);

                  // if this region moved locations
                  ServerName newRs = regionLoc.getServerName();
                  if (newRs.compareTo(rsLoc) != 0) {
                    LOG.debug("Region with " + splitAlgo.rowToStr(split)
                        + " moved to " + newRs + ". Relocating...");
                    // relocate it, don't use it right now
                    if (!daughterRegions.containsKey(newRs)) {
                      LinkedList> entry = Lists.newLinkedList();
                      daughterRegions.put(newRs, entry);
                    }
                    daughterRegions.get(newRs).add(dr);
                    dr = null;
                    continue;
                  }

                  // make sure this region wasn't already split
                  byte[] sk = regionLoc.getRegionInfo().getStartKey();
                  if (sk.length != 0) {
                    if (Bytes.equals(split, sk)) {
                      LOG.debug("Region already split on "
                          + splitAlgo.rowToStr(split) + ".  Skipping this region...");
                      ++splitCount;
                      dr = null;
                      continue;
                    }
                    byte[] start = dr.getFirst();
                    Preconditions.checkArgument(Bytes.equals(start, sk), splitAlgo
                        .rowToStr(start) + " != " + splitAlgo.rowToStr(sk));
                  }

                  // passed all checks! found a good region
                  break;
                }
                if (regionList.isEmpty()) {
                  daughterRegions.remove(rsLoc);
                }
                if (dr == null)
                  continue;

                // we have a good region, time to split!
                byte[] split = dr.getSecond();
                LOG.debug("Splitting at " + splitAlgo.rowToStr(split));
                try (Admin admin = connection.getAdmin()) {
                  admin.split(tableName, split);
                }

                LinkedList> finished = Lists.newLinkedList();
                LinkedList> local_finished = Lists.newLinkedList();
                if (conf.getBoolean("split.verify", true)) {
                  // we need to verify and rate-limit our splits
                  outstanding.addLast(dr);
                  // with too many outstanding splits, wait for some to finish
                  while (outstanding.size() >= MAX_OUTSTANDING) {
                    LOG.debug("Wait for outstanding splits " + outstanding.size());
                    local_finished = splitScan(outstanding, connection, tableName, splitAlgo);
                    if (local_finished.isEmpty()) {
                      Thread.sleep(30 * 1000);
                    } else {
                      finished.addAll(local_finished);
                      outstanding.removeAll(local_finished);
                      LOG.debug(local_finished.size() + " outstanding splits finished");
                    }
                  }
                } else {
                  finished.add(dr);
                }

                // mark each finished region as successfully split.
                for (Pair region : finished) {
                  splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
                      + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
                  splitCount++;
                  if (splitCount % 10 == 0) {
                    long tDiff = (System.currentTimeMillis() - startTime)
                        / splitCount;
                    LOG.debug("STATUS UPDATE: " + splitCount + " / " + origCount
                        + ". Avg Time / Split = "
                        + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
                  }
                }
              }
            }
            if (conf.getBoolean("split.verify", true)) {
              while (!outstanding.isEmpty()) {
                LOG.debug("Finally Wait for outstanding splits " + outstanding.size());
                LinkedList> finished = splitScan(outstanding,
                    connection, tableName, splitAlgo);
                if (finished.isEmpty()) {
                  Thread.sleep(30 * 1000);
                } else {
                  outstanding.removeAll(finished);
                  for (Pair region : finished) {
                    splitOut.writeChars("- " + splitAlgo.rowToStr(region.getFirst())
                        + " " + splitAlgo.rowToStr(region.getSecond()) + "\n");
                    splitCount++;
                  }
                  LOG.debug("Finally " + finished.size() + " outstanding splits finished");
                }
              }
            }
            LOG.debug("All regions have been successfully split!");
          } finally {
            long tDiff = System.currentTimeMillis() - startTime;
            LOG.debug("TOTAL TIME = "
                + org.apache.hadoop.util.StringUtils.formatTime(tDiff));
            LOG.debug("Splits = " + splitCount);
            if (0 < splitCount) {
              LOG.debug("Avg Time / Split = "
                  + org.apache.hadoop.util.StringUtils.formatTime(tDiff / splitCount));
            }
          }
        } finally {
          splitOut.close();
          fs.delete(splitFile, false);
        }
      }
    }
  }

  /**
   * @throws IOException if the specified SplitAlgorithm class couldn't be
   * instantiated
   */
  public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
          String splitClassName) throws IOException {
    Class splitClass;

    // For split algorithms builtin to RegionSplitter, the user can specify
    // their simple class name instead of a fully qualified class name.
    if(splitClassName.equals(HexStringSplit.class.getSimpleName())) {
      splitClass = HexStringSplit.class;
    } else if (splitClassName.equals(DecimalStringSplit.class.getSimpleName())) {
      splitClass = DecimalStringSplit.class;
    } else if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
      splitClass = UniformSplit.class;
    } else {
      try {
        splitClass = conf.getClassByName(splitClassName);
      } catch (ClassNotFoundException e) {
        throw new IOException("Couldn't load split class " + splitClassName, e);
      }
      if(splitClass == null) {
        throw new IOException("Failed loading split class " + splitClassName);
      }
      if(!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
        throw new IOException(
                "Specified split class doesn't implement SplitAlgorithm");
      }
    }
    try {
      return splitClass.asSubclass(SplitAlgorithm.class).newInstance();
    } catch (Exception e) {
      throw new IOException("Problem loading split algorithm: ", e);
    }
  }

  static LinkedList> splitScan(
      LinkedList> regionList,
      final Connection connection,
      final TableName tableName,
      SplitAlgorithm splitAlgo)
      throws IOException, InterruptedException {
    LinkedList> finished = Lists.newLinkedList();
    LinkedList> logicalSplitting = Lists.newLinkedList();
    LinkedList> physicalSplitting = Lists.newLinkedList();

    // Get table info
    Pair tableDirAndSplitFile =
      getTableDirAndSplitFile(connection.getConfiguration(), tableName);
    Path tableDir = tableDirAndSplitFile.getFirst();
    FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());
    // Clear the cache to forcibly refresh region information
    ((ClusterConnection)connection).clearRegionCache();
    HTableDescriptor htd = null;
    try (Table table = connection.getTable(tableName)) {
      htd = table.getTableDescriptor();
    }
    try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {

      // for every region that hasn't been verified as a finished split
      for (Pair region : regionList) {
        byte[] start = region.getFirst();
        byte[] split = region.getSecond();

        // see if the new split daughter region has come online
        try {
          HRegionInfo dri = regionLocator.getRegionLocation(split).getRegionInfo();
          if (dri.isOffline() || !Bytes.equals(dri.getStartKey(), split)) {
            logicalSplitting.add(region);
            continue;
          }
        } catch (NoServerForRegionException nsfre) {
          // NSFRE will occur if the old hbase:meta entry has no server assigned
          LOG.info(nsfre);
          logicalSplitting.add(region);
          continue;
        }

        try {
          // when a daughter region is opened, a compaction is triggered
          // wait until compaction completes for both daughter regions
          LinkedList check = Lists.newLinkedList();
          check.add(regionLocator.getRegionLocation(start).getRegionInfo());
          check.add(regionLocator.getRegionLocation(split).getRegionInfo());
          for (HRegionInfo hri : check.toArray(new HRegionInfo[check.size()])) {
            byte[] sk = hri.getStartKey();
            if (sk.length == 0)
              sk = splitAlgo.firstRow();

            HRegionFileSystem regionFs = HRegionFileSystem.openRegionFromFileSystem(
                connection.getConfiguration(), fs, tableDir, hri, true);

            // Check every Column Family for that region -- check does not have references.
            boolean refFound = false;
            for (HColumnDescriptor c : htd.getFamilies()) {
              if ((refFound = regionFs.hasReferences(c.getNameAsString()))) {
                break;
              }
            }

            // compaction is completed when all reference files are gone
            if (!refFound) {
              check.remove(hri);
            }
          }
          if (check.isEmpty()) {
            finished.add(region);
          } else {
            physicalSplitting.add(region);
          }
        } catch (NoServerForRegionException nsfre) {
          LOG.debug("No Server Exception thrown for: " + splitAlgo.rowToStr(start));
          physicalSplitting.add(region);
          ((ClusterConnection)connection).clearRegionCache();
        }
      }

      LOG.debug("Split Scan: " + finished.size() + " finished / "
          + logicalSplitting.size() + " split wait / "
          + physicalSplitting.size() + " reference wait");

      return finished;
    }
  }

  /**
   * @param conf
   * @param tableName
   * @return A Pair where first item is table dir and second is the split file.
   * @throws IOException 
   */
  private static Pair getTableDirAndSplitFile(final Configuration conf,
      final TableName tableName)
  throws IOException {
    Path hbDir = FSUtils.getRootDir(conf);
    Path tableDir = FSUtils.getTableDir(hbDir, tableName);
    Path splitFile = new Path(tableDir, "_balancedSplit");
    return new Pair(tableDir, splitFile);
  }

  static LinkedList> getSplits(final Connection connection,
      TableName tableName, SplitAlgorithm splitAlgo)
  throws IOException {
    Pair tableDirAndSplitFile =
      getTableDirAndSplitFile(connection.getConfiguration(), tableName);
    Path tableDir = tableDirAndSplitFile.getFirst();
    Path splitFile = tableDirAndSplitFile.getSecond();
 
    FileSystem fs = tableDir.getFileSystem(connection.getConfiguration());

    // Using strings because (new byte[]{0}).equals(new byte[]{0}) == false
    Set> daughterRegions = Sets.newHashSet();

    // Does a split file exist?
    if (!fs.exists(splitFile)) {
      // NO = fresh start. calculate splits to make
      LOG.debug("No " + splitFile.getName() + " file. Calculating splits ");

      // Query meta for all regions in the table
      Set> rows = Sets.newHashSet();
      Pair tmp = null;
      try (RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
        tmp = regionLocator.getStartEndKeys();
      }
      Preconditions.checkArgument(tmp.getFirst().length == tmp.getSecond().length,
          "Start and End rows should be equivalent");
      for (int i = 0; i < tmp.getFirst().length; ++i) {
        byte[] start = tmp.getFirst()[i], end = tmp.getSecond()[i];
        if (start.length == 0)
          start = splitAlgo.firstRow();
        if (end.length == 0)
          end = splitAlgo.lastRow();
        rows.add(Pair.newPair(start, end));
      }
      LOG.debug("Table " + tableName + " has " + rows.size() + " regions that will be split.");

      // prepare the split file
      Path tmpFile = new Path(tableDir, "_balancedSplit_prepare");
      FSDataOutputStream tmpOut = fs.create(tmpFile);

      // calculate all the splits == [daughterRegions] = [(start, splitPoint)]
      for (Pair r : rows) {
        byte[] splitPoint = splitAlgo.split(r.getFirst(), r.getSecond());
        String startStr = splitAlgo.rowToStr(r.getFirst());
        String splitStr = splitAlgo.rowToStr(splitPoint);
        daughterRegions.add(Pair.newPair(startStr, splitStr));
        LOG.debug("Will Split [" + startStr + " , "
            + splitAlgo.rowToStr(r.getSecond()) + ") at " + splitStr);
        tmpOut.writeChars("+ " + startStr + splitAlgo.separator() + splitStr
            + "\n");
      }
      tmpOut.close();
      fs.rename(tmpFile, splitFile);
    } else {
      LOG.debug("_balancedSplit file found. Replay log to restore state...");
      FSUtils.getInstance(fs, connection.getConfiguration())
        .recoverFileLease(fs, splitFile, connection.getConfiguration(), null);

      // parse split file and process remaining splits
      FSDataInputStream tmpIn = fs.open(splitFile);
      StringBuilder sb = new StringBuilder(tmpIn.available());
      while (tmpIn.available() > 0) {
        sb.append(tmpIn.readChar());
      }
      tmpIn.close();
      for (String line : sb.toString().split("\n")) {
        String[] cmd = line.split(splitAlgo.separator());
        Preconditions.checkArgument(3 == cmd.length);
        byte[] start = splitAlgo.strToRow(cmd[1]);
        String startStr = splitAlgo.rowToStr(start);
        byte[] splitPoint = splitAlgo.strToRow(cmd[2]);
        String splitStr = splitAlgo.rowToStr(splitPoint);
        Pair r = Pair.newPair(startStr, splitStr);
        if (cmd[0].equals("+")) {
          LOG.debug("Adding: " + r);
          daughterRegions.add(r);
        } else {
          LOG.debug("Removing: " + r);
          Preconditions.checkArgument(cmd[0].equals("-"),
              "Unknown option: " + cmd[0]);
          Preconditions.checkState(daughterRegions.contains(r),
              "Missing row: " + r);
          daughterRegions.remove(r);
        }
      }
      LOG.debug("Done reading. " + daughterRegions.size() + " regions left.");
    }
    LinkedList> ret = Lists.newLinkedList();
    for (Pair r : daughterRegions) {
      ret.add(Pair.newPair(splitAlgo.strToRow(r.getFirst()), splitAlgo
          .strToRow(r.getSecond())));
    }
    return ret;
  }

  /**
   * HexStringSplit is a well-known {@link SplitAlgorithm} for choosing region
   * boundaries. The format of a HexStringSplit region boundary is the ASCII
   * representation of an MD5 checksum, or any other uniformly distributed
   * hexadecimal value. Row are hex-encoded long values in the range
   * "00000000" => "FFFFFFFF" and are left-padded with zeros to keep the
   * same order lexicographically as if they were binary.
   *
   * Since this split algorithm uses hex strings as keys, it is easy to read &
   * write in the shell but takes up more space and may be non-intuitive.
   */
  public static class HexStringSplit extends NumberStringSplit {
    final static String DEFAULT_MIN_HEX = "00000000";
    final static String DEFAULT_MAX_HEX = "FFFFFFFF";
    final static int RADIX_HEX = 16;

    public HexStringSplit() {
      super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
    }

  }

  /**
   * The format of a DecimalStringSplit region boundary is the ASCII representation of
   * reversed sequential number, or any other uniformly distributed decimal value.
   * Row are decimal-encoded long values in the range
   * "00000000" => "99999999" and are left-padded with zeros to keep the
   * same order lexicographically as if they were binary.
   */
  public static class DecimalStringSplit extends NumberStringSplit {
    final static String DEFAULT_MIN_DEC = "00000000";
    final static String DEFAULT_MAX_DEC = "99999999";
    final static int RADIX_DEC = 10;

    public DecimalStringSplit() {
      super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
    }

  }

  public abstract static class NumberStringSplit implements SplitAlgorithm {

    String firstRow;
    BigInteger firstRowInt;
    String lastRow;
    BigInteger lastRowInt;
    int rowComparisonLength;
    int radix;

    NumberStringSplit(String minRow, String maxRow, int radix) {
      this.firstRow = minRow;
      this.lastRow = maxRow;
      this.radix = radix;
      this.firstRowInt = BigInteger.ZERO;
      this.lastRowInt = new BigInteger(lastRow, this.radix);
      this.rowComparisonLength = lastRow.length();
    }

    public byte[] split(byte[] start, byte[] end) {
      BigInteger s = convertToBigInteger(start);
      BigInteger e = convertToBigInteger(end);
      Preconditions.checkArgument(!e.equals(BigInteger.ZERO));
      return convertToByte(split2(s, e));
    }

    public byte[][] split(int n) {
      Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,
          "last row (%s) is configured less than first row (%s)", lastRow,
          firstRow);
      // +1 to range because the last row is inclusive
      BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);
      Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,
          "split granularity (%s) is greater than the range (%s)", n, range);

      BigInteger[] splits = new BigInteger[n - 1];
      BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));
      for (int i = 1; i < n; i++) {
        // NOTE: this means the last region gets all the slop.
        // This is not a big deal if we're assuming n << MAXHEX
        splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger
            .valueOf(i)));
      }
      return convertToBytes(splits);
    }

    @Override
    public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
      BigInteger s = convertToBigInteger(start);
      BigInteger e = convertToBigInteger(end);

      Preconditions.checkArgument(e.compareTo(s) > 0,
              "last row (%s) is configured less than first row (%s)", rowToStr(end),
              end);
      // +1 to range because the last row is inclusive
      BigInteger range = e.subtract(s).add(BigInteger.ONE);
      Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,
              "split granularity (%s) is greater than the range (%s)", numSplits, range);

      BigInteger[] splits = new BigInteger[numSplits - 1];
      BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));
      for (int i = 1; i < numSplits; i++) {
        // NOTE: this means the last region gets all the slop.
        // This is not a big deal if we're assuming n << MAXHEX
        splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger
                .valueOf(i)));
      }

      if (inclusive) {
        BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];
        inclusiveSplitPoints[0] = convertToBigInteger(start);
        inclusiveSplitPoints[numSplits] = convertToBigInteger(end);
        System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);
        return convertToBytes(inclusiveSplitPoints);
      } else {
        return convertToBytes(splits);
      }
    }

    public byte[] firstRow() {
      return convertToByte(firstRowInt);
    }

    public byte[] lastRow() {
      return convertToByte(lastRowInt);
    }

    public void setFirstRow(String userInput) {
      firstRow = userInput;
      firstRowInt = new BigInteger(firstRow, radix);
    }

    public void setLastRow(String userInput) {
      lastRow = userInput;
      lastRowInt = new BigInteger(lastRow, radix);
      // Precondition: lastRow > firstRow, so last's length is the greater
      rowComparisonLength = lastRow.length();
    }

    public byte[] strToRow(String in) {
      return convertToByte(new BigInteger(in, radix));
    }

    public String rowToStr(byte[] row) {
      return Bytes.toStringBinary(row);
    }

    public String separator() {
      return " ";
    }

    @Override
    public void setFirstRow(byte[] userInput) {
      firstRow = Bytes.toString(userInput);
    }

    @Override
    public void setLastRow(byte[] userInput) {
      lastRow = Bytes.toString(userInput);
    }

    /**
     * Divide 2 numbers in half (for split algorithm)
     *
     * @param a number #1
     * @param b number #2
     * @return the midpoint of the 2 numbers
     */
    public BigInteger split2(BigInteger a, BigInteger b) {
      return a.add(b).divide(BigInteger.valueOf(2)).abs();
    }

    /**
     * Returns an array of bytes corresponding to an array of BigIntegers
     *
     * @param bigIntegers numbers to convert
     * @return bytes corresponding to the bigIntegers
     */
    public byte[][] convertToBytes(BigInteger[] bigIntegers) {
      byte[][] returnBytes = new byte[bigIntegers.length][];
      for (int i = 0; i < bigIntegers.length; i++) {
        returnBytes[i] = convertToByte(bigIntegers[i]);
      }
      return returnBytes;
    }

    /**
     * Returns the bytes corresponding to the BigInteger
     *
     * @param bigInteger number to convert
     * @param pad padding length
     * @return byte corresponding to input BigInteger
     */
    public byte[] convertToByte(BigInteger bigInteger, int pad) {
      String bigIntegerString = bigInteger.toString(radix);
      bigIntegerString = StringUtils.leftPad(bigIntegerString, pad, '0');
      return Bytes.toBytes(bigIntegerString);
    }

    /**
     * Returns the bytes corresponding to the BigInteger
     *
     * @param bigInteger number to convert
     * @return corresponding bytes
     */
    public byte[] convertToByte(BigInteger bigInteger) {
      return convertToByte(bigInteger, rowComparisonLength);
    }

    /**
     * Returns the BigInteger represented by the byte array
     *
     * @param row byte array representing row
     * @return the corresponding BigInteger
     */
    public BigInteger convertToBigInteger(byte[] row) {
      return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix)
          : BigInteger.ZERO;
    }

    @Override
    public String toString() {
      return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
          + "," + rowToStr(lastRow()) + "]";
    }
  }

  /**
   * A SplitAlgorithm that divides the space of possible keys evenly. Useful
   * when the keys are approximately uniform random bytes (e.g. hashes). Rows
   * are raw byte values in the range 00 => FF and are right-padded with
   * zeros to keep the same memcmp() order. This is the natural algorithm to use
   * for a byte[] environment and saves space, but is not necessarily the
   * easiest for readability.
   */
  public static class UniformSplit implements SplitAlgorithm {
    static final byte xFF = (byte) 0xFF;
    byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
    byte[] lastRowBytes =
            new byte[] {xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
    public byte[] split(byte[] start, byte[] end) {
      return Bytes.split(start, end, 1)[1];
    }

    @Override
    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="NP_NULL_ON_SOME_PATH",
      justification="Preconditions checks insure we are not going to dereference a null value")
    public byte[][] split(int numRegions) {
      Preconditions.checkArgument(
          Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
          "last row (%s) is configured less than first row (%s)",
          Bytes.toStringBinary(lastRowBytes),
          Bytes.toStringBinary(firstRowBytes));

      byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
          numRegions - 1);
      Preconditions.checkState(splits != null,
          "Could not split region with given user input: " + this);

      // remove endpoints, which are included in the splits list
      return Arrays.copyOfRange(splits, 1, splits.length - 1);
    }

    public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
      if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {
        start = firstRowBytes;
      }
      if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {
        end = lastRowBytes;
      }
      Preconditions.checkArgument(
              Bytes.compareTo(end, start) > 0,
              "last row (%s) is configured less than first row (%s)",
              Bytes.toStringBinary(end),
              Bytes.toStringBinary(start));

      byte[][] splits = Bytes.split(start, end, true,
              numSplits - 1);
      Preconditions.checkState(splits != null,
              "Could not calculate input splits with given user input: " + this);
      if (inclusive) {
        return splits;
      } else {
        // remove endpoints, which are included in the splits list
        return Arrays.copyOfRange(splits, 1, splits.length - 1);
      }
    }

    @Override
    public byte[] firstRow() {
      return firstRowBytes;
    }

    @Override
    public byte[] lastRow() {
      return lastRowBytes;
    }

    @Override
    public void setFirstRow(String userInput) {
      firstRowBytes = Bytes.toBytesBinary(userInput);
    }

    @Override
    public void setLastRow(String userInput) {
      lastRowBytes = Bytes.toBytesBinary(userInput);
    }


    @Override
    public void setFirstRow(byte[] userInput) {
      firstRowBytes = userInput;
    }

    @Override
    public void setLastRow(byte[] userInput) {
      lastRowBytes = userInput;
    }

    @Override
    public byte[] strToRow(String input) {
      return Bytes.toBytesBinary(input);
    }

    @Override
    public String rowToStr(byte[] row) {
      return Bytes.toStringBinary(row);
    }

    @Override
    public String separator() {
      return ",";
    }

    @Override
    public String toString() {
      return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
          + "," + rowToStr(lastRow()) + "]";
    }
  }
}
    

    







    © 2015 - 2024 Weber Informatics LLC | Privacy Policy