All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.util.RegionSplitCalculator Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;

/**
 * This is a generic region split calculator. It requires Ranges that provide start, end, and a
 * comparator. It works in two phases -- the first adds ranges and rejects backwards ranges. Then
 * one calls calcRegions to generate the multimap that has a start split key as a key and possibly
 * multiple Ranges as members. To traverse, one normally would get the split set, and iterate
 * through the calcRegions. Normal regions would have only one entry, holes would have zero, and any
 * overlaps would have multiple entries. The interface is a bit cumbersome currently but is exposed
 * this way so that clients can choose how to iterate through the region splits.
 * @param 
 */
@InterfaceAudience.Private
public class RegionSplitCalculator {
  private static final Logger LOG = LoggerFactory.getLogger(RegionSplitCalculator.class);

  private final Comparator rangeCmp;
  /**
   * This contains a sorted set of all the possible split points Invariant: once populated this has
   * 0 entries if empty or at most n+1 values where n == number of added ranges.
   */
  private final TreeSet splits = new TreeSet<>(BYTES_COMPARATOR);

  /**
   * This is a map from start key to regions with the same start key. Invariant: This always have n
   * values in total
   */
  private final Multimap starts = ArrayListMultimap.create();

  /**
   * SPECIAL CASE
   */
  private final static byte[] ENDKEY = null;

  public RegionSplitCalculator(Comparator cmp) {
    rangeCmp = cmp;
  }

  public final static Comparator BYTES_COMPARATOR = new ByteArrayComparator() {
    @Override
    public int compare(byte[] l, byte[] r) {
      if (l == null && r == null) return 0;
      if (l == null) return 1;
      if (r == null) return -1;
      return super.compare(l, r);
    }
  };

  /**
   * SPECIAL CASE wrapper for empty end key
   * @return ENDKEY if end key is empty, else normal endkey.
   */
  private static  byte[] specialEndKey(R range) {
    byte[] end = range.getEndKey();
    if (end.length == 0) {
      return ENDKEY;
    }
    return end;
  }

  /**
   * Adds an edge to the split calculator
   * @return true if is included, false if backwards/invalid
   */
  public boolean add(R range) {
    byte[] start = range.getStartKey();
    byte[] end = specialEndKey(range);

    // No need to use Arrays.equals because ENDKEY is null
    if (end != ENDKEY && Bytes.compareTo(start, end) > 0) {
      // don't allow backwards edges
      LOG.debug(
        "attempted to add backwards edge: " + Bytes.toString(start) + " " + Bytes.toString(end));
      return false;
    }

    splits.add(start);
    splits.add(end);
    starts.put(start, range);
    return true;
  }

  /**
   * Generates a coverage multimap from split key to Regions that start with the split key.
   * @return coverage multimap
   */
  public Multimap calcCoverage() {
    // This needs to be sorted to force the use of the comparator on the values,
    // otherwise byte array comparison isn't used
    Multimap regions = TreeMultimap.create(BYTES_COMPARATOR, rangeCmp);

    // march through all splits from the start points
    for (Entry> start : starts.asMap().entrySet()) {
      byte[] key = start.getKey();
      for (R r : start.getValue()) {
        regions.put(key, r);

        for (byte[] coveredSplit : splits.subSet(r.getStartKey(), specialEndKey(r))) {
          regions.put(coveredSplit, r);
        }
      }
    }
    return regions;
  }

  public TreeSet getSplits() {
    return splits;
  }

  public Multimap getStarts() {
    return starts;
  }

  /**
   * Find specified number of top ranges in a big overlap group. It could return less if there are
   * not that many top ranges. Once these top ranges are excluded, the big overlap group will be
   * broken into ranges with no overlapping, or smaller overlapped groups, and most likely some
   * holes.
   * @param bigOverlap a list of ranges that overlap with each other
   * @param count      the max number of ranges to find
   * @return a list of ranges that overlap with most others
   */
  public static  List findBigRanges(Collection bigOverlap, int count) {
    List bigRanges = new ArrayList<>();

    // The key is the count of overlaps,
    // The value is a list of ranges that have that many overlaps
    TreeMap> overlapRangeMap = new TreeMap<>();
    for (R r : bigOverlap) {
      // Calculates the # of overlaps for each region
      // and populates rangeOverlapMap
      byte[] startKey = r.getStartKey();
      byte[] endKey = specialEndKey(r);

      int overlappedRegions = 0;
      for (R rr : bigOverlap) {
        byte[] start = rr.getStartKey();
        byte[] end = specialEndKey(rr);

        if (
          BYTES_COMPARATOR.compare(startKey, end) < 0 && BYTES_COMPARATOR.compare(endKey, start) > 0
        ) {
          overlappedRegions++;
        }
      }

      // One region always overlaps with itself,
      // so overlappedRegions should be more than 1
      // for actual overlaps.
      if (overlappedRegions > 1) {
        Integer key = Integer.valueOf(overlappedRegions);
        List ranges = overlapRangeMap.get(key);
        if (ranges == null) {
          ranges = new ArrayList<>();
          overlapRangeMap.put(key, ranges);
        }
        ranges.add(r);
      }
    }
    int toBeAdded = count;
    for (Integer key : overlapRangeMap.descendingKeySet()) {
      List chunk = overlapRangeMap.get(key);
      int chunkSize = chunk.size();
      if (chunkSize <= toBeAdded) {
        bigRanges.addAll(chunk);
        toBeAdded -= chunkSize;
        if (toBeAdded > 0) continue;
      } else {
        // Try to use the middle chunk in case the overlapping is
        // chained, for example: [a, c), [b, e), [d, g), [f h)...
        // In such a case, sideline the middle chunk will break
        // the group efficiently.
        int start = (chunkSize - toBeAdded) / 2;
        int end = start + toBeAdded;
        for (int i = start; i < end; i++) {
          bigRanges.add(chunk.get(i));
        }
      }
      break;
    }
    return bigRanges;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy