org.apache.omid.committable.hbase.RegionSplitter Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.omid.committable.hbase;
import org.apache.phoenix.thirdparty.com.google.common.base.Preconditions;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.util.Bytes;
import java.io.IOException;
import java.util.Arrays;
/**
* This class contains only the required behavior of the original
* org.apache.hadoop.hbase.util.RegionSplitter class to avoid
* having a reference to hbase-testing-util, which transitively
* imports hbase-server causing dependency conflicts for this module.
*/
public class RegionSplitter {
/**
* A generic interface for the RegionSplitter code to use for all it's functionality. Note that the original authors
* of this code use see org.apache.hadoop.hbase.util.HexStringSplit to partition their table and set it as default, but provided this for
* your custom algorithm. To use, create a new derived class from this interface and call
* see RegionSplitter#createPresplitTable or
* see RegionSplitter#rollingSplit(String, SplitAlgorithm, Configuration)} with the argument splitClassName
* giving the name of your class.
*/
public interface SplitAlgorithm {
/**
* Split a pre-existing region into 2 regions.
*
* @param start
* first row (inclusive)
* @param end
* last row (exclusive)
* @return the split row to use
*/
byte[] split(byte[] start, byte[] end);
/**
* Split an entire table.
*
* @param numRegions
* number of regions to split the table into
*
* @throws RuntimeException
* user input is validated at this time. may throw a runtime exception in response to a parse
* failure
* @return array of split keys for the initial regions of the table. The length of the returned array should be
* numRegions-1.
*/
byte[][] split(int numRegions);
/**
* In HBase, the first row is represented by an empty byte array. This might cause problems with your split
* algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
*
* @return your representation of your first row
*/
byte[] firstRow();
/**
* In HBase, the last row is represented by an empty byte array. This might cause problems with your split
* algorithm or row printing. All your APIs will be passed firstRow() instead of empty array.
*
* @return your representation of your last row
*/
byte[] lastRow();
/**
* In HBase, the last row is represented by an empty byte array. Set this value to help the split code
* understand how to evenly divide the first region.
*
* @param userInput
* raw user input (may throw RuntimeException on parse failure)
*/
void setFirstRow(String userInput);
/**
* In HBase, the last row is represented by an empty byte array. Set this value to help the split code
* understand how to evenly divide the last region. Note that this last row is inclusive for all rows sharing
* the same prefix.
*
* @param userInput raw user input (may throw RuntimeException on parse failure)
*/
void setLastRow(String userInput);
/**
* @param input
* user or file input for row
* @return byte array representation of this row for HBase
*/
byte[] strToRow(String input);
/**
* @param row byte array representing a row in HBase
* @return String to use for debug and file printing
*/
String rowToStr(byte[] row);
/**
* @return the separator character to use when storing / printing the row
*/
String separator();
/**
* Set the first row
*
* @param userInput
* byte array of the row key.
*/
void setFirstRow(byte[] userInput);
/**
* Set the last row
*
* @param userInput
* byte array of the row key.
*/
void setLastRow(byte[] userInput);
}
/**
* @param conf Hbase conf
* @param splitClassName split class name to be used
* @return an instance of SplitAlgorithm
* @throws IOException if the specified SplitAlgorithm class couldn't be instantiated
*/
public static SplitAlgorithm newSplitAlgoInstance(Configuration conf,
String splitClassName) throws IOException {
Class> splitClass;
// For split algorithms builtin to RegionSplitter, the user can specify
// their simple class name instead of a fully qualified class name.
if (splitClassName.equals(UniformSplit.class.getSimpleName())) {
splitClass = UniformSplit.class;
} else {
try {
splitClass = conf.getClassByName(splitClassName);
} catch (ClassNotFoundException e) {
throw new IOException("Couldn't load split class " + splitClassName, e);
}
if (splitClass == null) {
throw new IOException("Failed loading split class " + splitClassName);
}
if (!SplitAlgorithm.class.isAssignableFrom(splitClass)) {
throw new IOException(
"Specified split class doesn't implement SplitAlgorithm");
}
}
try {
return splitClass.asSubclass(SplitAlgorithm.class).getDeclaredConstructor().newInstance();
} catch (Exception e) {
throw new IOException("Problem loading split algorithm: ", e);
}
}
/**
* A SplitAlgorithm that divides the space of possible keys evenly. Useful when the keys are approximately uniform
* random bytes (e.g. hashes). Rows are raw byte values in the range [00..FF] and are right-padded with zeros
* to keep the same memcmp() order. This is the natural algorithm to use for a byte[] environment and saves space,
* but is not necessarily the easiest for readability.
*/
public static class UniformSplit implements SplitAlgorithm {
static final byte xFF = (byte) 0xFF;
byte[] firstRowBytes = ArrayUtils.EMPTY_BYTE_ARRAY;
byte[] lastRowBytes =
new byte[]{xFF, xFF, xFF, xFF, xFF, xFF, xFF, xFF};
public byte[] split(byte[] start, byte[] end) {
return Bytes.split(start, end, 1)[1];
}
@Override
public byte[][] split(int numRegions) {
Preconditions.checkArgument(
Bytes.compareTo(lastRowBytes, firstRowBytes) > 0,
"last row (%s) is configured less than first row (%s)",
Bytes.toStringBinary(lastRowBytes),
Bytes.toStringBinary(firstRowBytes));
byte[][] splits = Bytes.split(firstRowBytes, lastRowBytes, true,
numRegions - 1);
Preconditions.checkState(splits != null,
"Could not split region with given user input: " + this);
// remove endpoints, which are included in the splits list
return Arrays.copyOfRange(splits, 1, splits.length - 1);
}
@Override
public byte[] firstRow() {
return firstRowBytes;
}
@Override
public byte[] lastRow() {
return lastRowBytes;
}
@Override
public void setFirstRow(String userInput) {
firstRowBytes = Bytes.toBytesBinary(userInput);
}
@Override
public void setLastRow(String userInput) {
lastRowBytes = Bytes.toBytesBinary(userInput);
}
@Override
public void setFirstRow(byte[] userInput) {
firstRowBytes = userInput;
}
@Override
public void setLastRow(byte[] userInput) {
lastRowBytes = userInput;
}
@Override
public byte[] strToRow(String input) {
return Bytes.toBytesBinary(input);
}
@Override
public String rowToStr(byte[] row) {
return Bytes.toStringBinary(row);
}
@Override
public String separator() {
return ",";
}
@Override
public String toString() {
return this.getClass().getSimpleName() + " [" + rowToStr(firstRow())
+ "," + rowToStr(lastRow()) + "]";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy