org.apache.hadoop.hbase.util.RegionSplitCalculator Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.hadoop.hbase.util.Bytes.ByteArrayComparator;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.collect.ArrayListMultimap;
import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
import org.apache.hbase.thirdparty.com.google.common.collect.TreeMultimap;
/**
* This is a generic region split calculator. It requires Ranges that provide start, end, and a
* comparator. It works in two phases -- the first adds ranges and rejects backwards ranges. Then
* one calls calcRegions to generate the multimap that has a start split key as a key and possibly
* multiple Ranges as members. To traverse, one normally would get the split set, and iterate
* through the calcRegions. Normal regions would have only one entry, holes would have zero, and any
* overlaps would have multiple entries. The interface is a bit cumbersome currently but is exposed
* this way so that clients can choose how to iterate through the region splits.
* @param
*/
@InterfaceAudience.Private
public class RegionSplitCalculator {
private static final Logger LOG = LoggerFactory.getLogger(RegionSplitCalculator.class);
private final Comparator rangeCmp;
/**
* This contains a sorted set of all the possible split points Invariant: once populated this has
* 0 entries if empty or at most n+1 values where n == number of added ranges.
*/
private final TreeSet splits = new TreeSet<>(BYTES_COMPARATOR);
/**
* This is a map from start key to regions with the same start key. Invariant: This always have n
* values in total
*/
private final Multimap starts = ArrayListMultimap.create();
/**
* SPECIAL CASE
*/
private final static byte[] ENDKEY = null;
public RegionSplitCalculator(Comparator cmp) {
rangeCmp = cmp;
}
public final static Comparator BYTES_COMPARATOR = new ByteArrayComparator() {
@Override
public int compare(byte[] l, byte[] r) {
if (l == null && r == null) return 0;
if (l == null) return 1;
if (r == null) return -1;
return super.compare(l, r);
}
};
/**
* SPECIAL CASE wrapper for empty end key
* @return ENDKEY if end key is empty, else normal endkey.
*/
private static byte[] specialEndKey(R range) {
byte[] end = range.getEndKey();
if (end.length == 0) {
return ENDKEY;
}
return end;
}
/**
* Adds an edge to the split calculator
* @return true if is included, false if backwards/invalid
*/
public boolean add(R range) {
byte[] start = range.getStartKey();
byte[] end = specialEndKey(range);
// No need to use Arrays.equals because ENDKEY is null
if (end != ENDKEY && Bytes.compareTo(start, end) > 0) {
// don't allow backwards edges
LOG.debug(
"attempted to add backwards edge: " + Bytes.toString(start) + " " + Bytes.toString(end));
return false;
}
splits.add(start);
splits.add(end);
starts.put(start, range);
return true;
}
/**
* Generates a coverage multimap from split key to Regions that start with the split key.
* @return coverage multimap
*/
public Multimap calcCoverage() {
// This needs to be sorted to force the use of the comparator on the values,
// otherwise byte array comparison isn't used
Multimap regions = TreeMultimap.create(BYTES_COMPARATOR, rangeCmp);
// march through all splits from the start points
for (Entry> start : starts.asMap().entrySet()) {
byte[] key = start.getKey();
for (R r : start.getValue()) {
regions.put(key, r);
for (byte[] coveredSplit : splits.subSet(r.getStartKey(), specialEndKey(r))) {
regions.put(coveredSplit, r);
}
}
}
return regions;
}
public TreeSet getSplits() {
return splits;
}
public Multimap getStarts() {
return starts;
}
/**
* Find specified number of top ranges in a big overlap group. It could return less if there are
* not that many top ranges. Once these top ranges are excluded, the big overlap group will be
* broken into ranges with no overlapping, or smaller overlapped groups, and most likely some
* holes.
* @param bigOverlap a list of ranges that overlap with each other
* @param count the max number of ranges to find
* @return a list of ranges that overlap with most others
*/
public static List findBigRanges(Collection bigOverlap, int count) {
List bigRanges = new ArrayList<>();
// The key is the count of overlaps,
// The value is a list of ranges that have that many overlaps
TreeMap> overlapRangeMap = new TreeMap<>();
for (R r : bigOverlap) {
// Calculates the # of overlaps for each region
// and populates rangeOverlapMap
byte[] startKey = r.getStartKey();
byte[] endKey = specialEndKey(r);
int overlappedRegions = 0;
for (R rr : bigOverlap) {
byte[] start = rr.getStartKey();
byte[] end = specialEndKey(rr);
if (
BYTES_COMPARATOR.compare(startKey, end) < 0 && BYTES_COMPARATOR.compare(endKey, start) > 0
) {
overlappedRegions++;
}
}
// One region always overlaps with itself,
// so overlappedRegions should be more than 1
// for actual overlaps.
if (overlappedRegions > 1) {
Integer key = Integer.valueOf(overlappedRegions);
List ranges = overlapRangeMap.get(key);
if (ranges == null) {
ranges = new ArrayList<>();
overlapRangeMap.put(key, ranges);
}
ranges.add(r);
}
}
int toBeAdded = count;
for (Integer key : overlapRangeMap.descendingKeySet()) {
List chunk = overlapRangeMap.get(key);
int chunkSize = chunk.size();
if (chunkSize <= toBeAdded) {
bigRanges.addAll(chunk);
toBeAdded -= chunkSize;
if (toBeAdded > 0) continue;
} else {
// Try to use the middle chunk in case the overlapping is
// chained, for example: [a, c), [b, e), [d, g), [f h)...
// In such a case, sideline the middle chunk will break
// the group efficiently.
int start = (chunkSize - toBeAdded) / 2;
int end = start + toBeAdded;
for (int i = start; i < end; i++) {
bigRanges.add(chunk.get(i));
}
}
break;
}
return bigRanges;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy