
com.hydraql.adapter.util.RegionSplitter Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hydraql.adapter.util;
import com.hydraql.common.lang.Preconditions;
import com.hydraql.common.util.StringUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.util.Bytes;
import java.math.BigInteger;
import java.util.Arrays;
/**
* @author leojie 2021/2/4 10:42 下午
*/
public class RegionSplitter {
public interface SplitAlgorithm {
/**
* Split a pre-existing region into 2 regions.
* @param start first row (inclusive)
* @param end last row (exclusive)
* @return the split row to use
*/
byte[] split(byte[] start, byte[] end);
/**
* Split an entire table.
* @param numRegions number of regions to split the table into
* @return array of split keys for the initial regions of the table. The length of the returned
* array should be numRegions-1.
* @throws RuntimeException user input is validated at this time. may throw a runtime exception
* in response to a parse failure
*/
byte[][] split(int numRegions);
/**
* Some MapReduce jobs may want to run multiple mappers per region, this is intended for such
* usecase.
* @param start first row (inclusive)
* @param end last row (exclusive)
* @param numSplits number of splits to generate
* @param inclusive whether start and end are returned as split points
* @return split结果
*/
byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive);
/**
* In HBase, the first row is represented by an empty byte array. This might cause problems with
* your split algorithm or row printing. All your APIs will be passed firstRow() instead of
* empty array.
* @return your representation of your first row
*/
byte[] firstRow();
/**
* In HBase, the last row is represented by an empty byte array. This might cause problems with
* your split algorithm or row printing. All your APIs will be passed firstRow() instead of
* empty array.
* @return your representation of your last row
*/
byte[] lastRow();
/**
* In HBase, the last row is represented by an empty byte array. Set this value to help the
* split code understand how to evenly divide the first region.
* @param userInput raw user input (may throw RuntimeException on parse failure)
*/
void setFirstRow(String userInput);
/**
* In HBase, the last row is represented by an empty byte array. Set this value to help the
* split code understand how to evenly divide the last region. Note that this last row is
* inclusive for all rows sharing the same prefix.
* @param userInput raw user input (may throw RuntimeException on parse failure)
*/
void setLastRow(String userInput);
/**
* @param input user or file input for row
* @return byte array representation of this row for HBase
*/
byte[] strToRow(String input);
/**
* @param row byte array representing a row in HBase
* @return String to use for debug & file printing
*/
String rowToStr(byte[] row);
/**
* @return the separator character to use when storing / printing the row
*/
String separator();
/**
* Set the first row
* @param userInput byte array of the row key.
*/
void setFirstRow(byte[] userInput);
/**
* Set the last row
* @param userInput byte array of the row key.
*/
void setLastRow(byte[] userInput);
}
public static class HexStringSplit extends NumberStringSplit {
final static String DEFAULT_MIN_HEX = "00000000";
final static String DEFAULT_MAX_HEX = "FFFFFFFF";
final static int RADIX_HEX = 16;
public HexStringSplit() {
super(DEFAULT_MIN_HEX, DEFAULT_MAX_HEX, RADIX_HEX);
}
}
public static class DecimalStringSplit extends NumberStringSplit {
final static String DEFAULT_MIN_DEC = "00000000";
final static String DEFAULT_MAX_DEC = "99999999";
final static int RADIX_DEC = 10;
public DecimalStringSplit() {
super(DEFAULT_MIN_DEC, DEFAULT_MAX_DEC, RADIX_DEC);
}
}
public abstract static class NumberStringSplit implements SplitAlgorithm {
String firstRow;
BigInteger firstRowInt;
String lastRow;
BigInteger lastRowInt;
int rowComparisonLength;
int radix;
NumberStringSplit(String minRow, String maxRow, int radix) {
this.firstRow = minRow;
this.lastRow = maxRow;
this.radix = radix;
this.firstRowInt = BigInteger.ZERO;
this.lastRowInt = new BigInteger(lastRow, this.radix);
this.rowComparisonLength = lastRow.length();
}
@Override
public byte[] split(byte[] start, byte[] end) {
BigInteger s = convertToBigInteger(start);
BigInteger e = convertToBigInteger(end);
Preconditions.checkArgument(!e.equals(BigInteger.ZERO));
return convertToByte(split2(s, e));
}
@Override
public byte[][] split(int n) {
Preconditions.checkArgument(lastRowInt.compareTo(firstRowInt) > 0,
"last row (%s) is configured less than first row (%s)", lastRow, firstRow);
// +1 to range because the last row is inclusive
BigInteger range = lastRowInt.subtract(firstRowInt).add(BigInteger.ONE);
Preconditions.checkState(range.compareTo(BigInteger.valueOf(n)) >= 0,
"split granularity (%s) is greater than the range (%s)", n, range);
BigInteger[] splits = new BigInteger[n - 1];
BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(n));
for (int i = 1; i < n; i++) {
// NOTE: this means the last region gets all the slop.
// This is not a big deal if we're assuming n << MAXHEX
splits[i - 1] = firstRowInt.add(sizeOfEachSplit.multiply(BigInteger.valueOf(i)));
}
return convertToBytes(splits);
}
@Override
public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
BigInteger s = convertToBigInteger(start);
BigInteger e = convertToBigInteger(end);
Preconditions.checkArgument(e.compareTo(s) > 0,
"last row (%s) is configured less than first row (%s)", rowToStr(end), end);
// +1 to range because the last row is inclusive
BigInteger range = e.subtract(s).add(BigInteger.ONE);
Preconditions.checkState(range.compareTo(BigInteger.valueOf(numSplits)) >= 0,
"split granularity (%s) is greater than the range (%s)", numSplits, range);
BigInteger[] splits = new BigInteger[numSplits - 1];
BigInteger sizeOfEachSplit = range.divide(BigInteger.valueOf(numSplits));
for (int i = 1; i < numSplits; i++) {
// NOTE: this means the last region gets all the slop.
// This is not a big deal if we're assuming n << MAXHEX
splits[i - 1] = s.add(sizeOfEachSplit.multiply(BigInteger.valueOf(i)));
}
if (inclusive) {
BigInteger[] inclusiveSplitPoints = new BigInteger[numSplits + 1];
inclusiveSplitPoints[0] = convertToBigInteger(start);
inclusiveSplitPoints[numSplits] = convertToBigInteger(end);
System.arraycopy(splits, 0, inclusiveSplitPoints, 1, splits.length);
return convertToBytes(inclusiveSplitPoints);
} else {
return convertToBytes(splits);
}
}
@Override
public byte[] firstRow() {
return convertToByte(firstRowInt);
}
@Override
public byte[] lastRow() {
return convertToByte(lastRowInt);
}
@Override
public void setFirstRow(String userInput) {
firstRow = userInput;
firstRowInt = new BigInteger(firstRow, radix);
}
@Override
public void setLastRow(String userInput) {
lastRow = userInput;
lastRowInt = new BigInteger(lastRow, radix);
// Precondition: lastRow > firstRow, so last's length is the greater
rowComparisonLength = lastRow.length();
}
@Override
public byte[] strToRow(String in) {
return convertToByte(new BigInteger(in, radix));
}
@Override
public String rowToStr(byte[] row) {
return Bytes.toStringBinary(row);
}
@Override
public String separator() {
return " ";
}
@Override
public void setFirstRow(byte[] userInput) {
firstRow = Bytes.toString(userInput);
}
@Override
public void setLastRow(byte[] userInput) {
lastRow = Bytes.toString(userInput);
}
/**
* Divide 2 numbers in half (for split algorithm)
* @param a number #1
* @param b number #2
* @return the midpoint of the 2 numbers
*/
public BigInteger split2(BigInteger a, BigInteger b) {
return a.add(b).divide(BigInteger.valueOf(2)).abs();
}
/**
* Returns an array of bytes corresponding to an array of BigIntegers
* @param bigIntegers numbers to convert
* @return bytes corresponding to the bigIntegers
*/
public byte[][] convertToBytes(BigInteger[] bigIntegers) {
byte[][] returnBytes = new byte[bigIntegers.length][];
for (int i = 0; i < bigIntegers.length; i++) {
returnBytes[i] = convertToByte(bigIntegers[i]);
}
return returnBytes;
}
/**
* Returns the bytes corresponding to the BigInteger
* @param bigInteger number to convert
* @param pad padding length
* @return byte corresponding to input BigInteger
*/
public byte[] convertToByte(BigInteger bigInteger, int pad) {
String bigIntegerString = bigInteger.toString(radix);
bigIntegerString = StringUtil.leftPad(bigIntegerString, pad, '0');
return Bytes.toBytes(bigIntegerString);
}
/**
* Returns the bytes corresponding to the BigInteger
* @param bigInteger number to convert
* @return corresponding bytes
*/
public byte[] convertToByte(BigInteger bigInteger) {
return convertToByte(bigInteger, rowComparisonLength);
}
/**
* Returns the BigInteger represented by the byte array
* @param row byte array representing row
* @return the corresponding BigInteger
*/
public BigInteger convertToBigInteger(byte[] row) {
return (row.length > 0) ? new BigInteger(Bytes.toString(row), radix) : BigInteger.ZERO;
}
@Override
public String toString() {
return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + ","
+ rowToStr(lastRow()) + "]";
}
}
public static class UniformSplit implements SplitAlgorithm {
static final byte xFF = (byte) 0xFF;
byte[] firstRowBytes = new byte[0];
byte[] lastRowBytes = new byte[] { xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF };
@Override
public byte[] split(byte[] start, byte[] end) {
return Bytes.split(start, end, 1)[1];
}
@Override
public byte[][] split(int numRegions) {
Preconditions.checkArgument(Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
"last row (%s) is configured less than first row (%s)", Bytes.toStringBinary(lastRowBytes),
Bytes.toStringBinary(firstRowBytes));
byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true, numRegions - 1);
Preconditions.checkState(splits != null,
"Could not split region with given user input: " + this);
// remove endpoints, which are included in the splits list
return splits == null ? null : Arrays.copyOfRange(splits, 1, splits.length - 1);
}
@Override
public byte[][] split(byte[] start, byte[] end, int numSplits, boolean inclusive) {
if (Arrays.equals(start, HConstants.EMPTY_BYTE_ARRAY)) {
start = firstRowBytes;
}
if (Arrays.equals(end, HConstants.EMPTY_BYTE_ARRAY)) {
end = lastRowBytes;
}
Preconditions.checkArgument(Bytes.compareTo(end, start) > 0,
"last row (%s) is configured less than first row (%s)", Bytes.toStringBinary(end),
Bytes.toStringBinary(start));
byte[][] splits = Bytes.split(start, end, true, numSplits - 1);
Preconditions.checkState(splits != null,
"Could not calculate input splits with given user input: " + this);
if (inclusive) {
return splits;
} else {
// remove endpoints, which are included in the splits list
return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
}
@Override
public byte[] firstRow() {
return firstRowBytes;
}
@Override
public byte[] lastRow() {
return lastRowBytes;
}
@Override
public void setFirstRow(String userInput) {
firstRowBytes = Bytes.toBytesBinary(userInput);
}
@Override
public void setLastRow(String userInput) {
lastRowBytes = Bytes.toBytesBinary(userInput);
}
@Override
public void setFirstRow(byte[] userInput) {
firstRowBytes = userInput;
}
@Override
public void setLastRow(byte[] userInput) {
lastRowBytes = userInput;
}
@Override
public byte[] strToRow(String input) {
return Bytes.toBytesBinary(input);
}
@Override
public String rowToStr(byte[] row) {
return Bytes.toStringBinary(row);
}
@Override
public String separator() {
return ",";
}
@Override
public String toString() {
return this.getClass().getSimpleName() + " [" + rowToStr(firstRow()) + ","
+ rowToStr(lastRow()) + "]";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy