org.apache.hadoop.hbase.CellComparator Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase;
import java.io.Serializable;
import java.util.Comparator;
import org.apache.hadoop.hbase.KeyValue.Type;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.util.Bytes;
import com.google.common.primitives.Longs;
/**
* Compare two HBase cells. Do not use this method comparing -ROOT-
or
* hbase:meta
cells. Cells from these tables need a specialized comparator, one that
* takes account of the special formatting of the row where we have commas to delimit table from
* regionname, from row. See KeyValue for how it has a special comparator to do hbase:meta cells
* and yet another for -ROOT-.
*/
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="UNKNOWN",
justification="Findbugs doesn't like the way we are negating the result of a compare in below")
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class CellComparator implements Comparator, Serializable {
private static final long serialVersionUID = -8760041766259623329L;
@Override
public int compare(Cell a, Cell b) {
return compare(a, b, false);
}
/**
* Compare cells.
* TODO: Replace with dynamic rather than static comparator so can change comparator
* implementation.
* @param a
* @param b
* @param ignoreSequenceid True if we are to compare the key portion only and ignore
* the sequenceid. Set to false to compare key and consider sequenceid.
* @return 0 if equal, -1 if a < b, and +1 if a > b.
*/
public static int compare(final Cell a, final Cell b, boolean ignoreSequenceid) {
// row
int c = compareRows(a, b);
if (c != 0) return c;
c = compareWithoutRow(a, b);
if(c != 0) return c;
if (!ignoreSequenceid) {
// Negate following comparisons so later edits show up first
// mvccVersion: later sorts first
return Longs.compare(b.getMvccVersion(), a.getMvccVersion());
} else {
return c;
}
}
public static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) {
return findCommonPrefix(left.getRowArray(), right.getRowArray(), left.getRowLength()
- rowCommonPrefix, right.getRowLength() - rowCommonPrefix, left.getRowOffset()
+ rowCommonPrefix, right.getRowOffset() + rowCommonPrefix);
}
private static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength,
int leftOffset, int rightOffset) {
int length = Math.min(leftLength, rightLength);
int result = 0;
while (result < length && left[leftOffset + result] == right[rightOffset + result]) {
result++;
}
return result;
}
public static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) {
return findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(), left.getFamilyLength()
- familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix, left.getFamilyOffset()
+ familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix);
}
public static int findCommonPrefixInQualifierPart(Cell left, Cell right,
int qualifierCommonPrefix) {
return findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(),
left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierLength()
- qualifierCommonPrefix, left.getQualifierOffset() + qualifierCommonPrefix,
right.getQualifierOffset() + qualifierCommonPrefix);
}
/**************** equals ****************************/
public static boolean equals(Cell a, Cell b){
return equalsRow(a, b)
&& equalsFamily(a, b)
&& equalsQualifier(a, b)
&& equalsTimestamp(a, b)
&& equalsType(a, b);
}
public static boolean equalsRow(Cell a, Cell b){
return Bytes.equals(
a.getRowArray(), a.getRowOffset(), a.getRowLength(),
b.getRowArray(), b.getRowOffset(), b.getRowLength());
}
public static boolean equalsFamily(Cell a, Cell b){
return Bytes.equals(
a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(),
b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength());
}
public static boolean equalsQualifier(Cell a, Cell b){
return Bytes.equals(
a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(),
b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength());
}
public static boolean equalsTimestamp(Cell a, Cell b){
return a.getTimestamp() == b.getTimestamp();
}
public static boolean equalsType(Cell a, Cell b){
return a.getTypeByte() == b.getTypeByte();
}
public static int compareColumns(final Cell left, final Cell right) {
int lfoffset = left.getFamilyOffset();
int rfoffset = right.getFamilyOffset();
int lclength = left.getQualifierLength();
int rclength = right.getQualifierLength();
int lfamilylength = left.getFamilyLength();
int rfamilylength = right.getFamilyLength();
int diff = compare(left.getFamilyArray(), lfoffset, lfamilylength, right.getFamilyArray(),
rfoffset, rfamilylength);
if (diff != 0) {
return diff;
} else {
return compare(left.getQualifierArray(), left.getQualifierOffset(), lclength,
right.getQualifierArray(), right.getQualifierOffset(), rclength);
}
}
public static int compareFamilies(Cell left, Cell right) {
return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(),
right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength());
}
public static int compareQualifiers(Cell left, Cell right) {
return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset(),
left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(),
right.getQualifierLength());
}
public int compareFlatKey(Cell left, Cell right) {
int compare = compareRows(left, right);
if (compare != 0) {
return compare;
}
return compareWithoutRow(left, right);
}
/**
* Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
* so can't be treated as plain byte arrays as this method does.
*/
public static int compareRows(final Cell left, final Cell right) {
return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(),
right.getRowArray(), right.getRowOffset(), right.getRowLength());
}
/**
* Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash)
* so can't be treated as plain byte arrays as this method does.
*/
public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset,
int rlength) {
return Bytes.compareTo(left, loffset, llength, right, roffset, rlength);
}
public static int compareWithoutRow(final Cell leftCell, final Cell rightCell) {
// If the column is not specified, the "minimum" key type appears the
// latest in the sorted order, regardless of the timestamp. This is used
// for specifying the last key/value in a given row, because there is no
// "lexicographically last column" (it would be infinitely long). The
// "maximum" key type does not need this behavior.
// Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this.
// TODO
if (leftCell.getFamilyLength() + leftCell.getQualifierLength() == 0
&& leftCell.getTypeByte() == Type.Minimum.getCode()) {
// left is "bigger", i.e. it appears later in the sorted order
return 1;
}
if (rightCell.getFamilyLength() + rightCell.getQualifierLength() == 0
&& rightCell.getTypeByte() == Type.Minimum.getCode()) {
return -1;
}
boolean sameFamilySize = (leftCell.getFamilyLength() == rightCell.getFamilyLength());
if (!sameFamilySize) {
// comparing column family is enough.
return Bytes.compareTo(leftCell.getFamilyArray(), leftCell.getFamilyOffset(),
leftCell.getFamilyLength(), rightCell.getFamilyArray(), rightCell.getFamilyOffset(),
rightCell.getFamilyLength());
}
int diff = compareColumns(leftCell, rightCell);
if (diff != 0) return diff;
diff = compareTimestamps(leftCell, rightCell);
if (diff != 0) return diff;
// Compare types. Let the delete types sort ahead of puts; i.e. types
// of higher numbers sort before those of lesser numbers. Maximum (255)
// appears ahead of everything, and minimum (0) appears after
// everything.
return (0xff & rightCell.getTypeByte()) - (0xff & leftCell.getTypeByte());
}
/**
* Compares cell's timestamps in DESCENDING order.
* The below older timestamps sorting ahead of newer timestamps looks
* wrong but it is intentional. This way, newer timestamps are first
* found when we iterate over a memstore and newer versions are the
* first we trip over when reading from a store file.
* @return 1 if left's timestamp < right's timestamp
* -1 if left's timestamp > right's timestamp
* 0 if both timestamps are equal
*/
public static int compareTimestamps(final Cell left, final Cell right) {
return compareTimestamps(left.getTimestamp(), right.getTimestamp());
}
/********************* hashCode ************************/
/**
* Returns a hash code that is always the same for two Cells having a matching equals(..) result.
*/
public static int hashCode(Cell cell){
if (cell == null) {// return 0 for empty Cell
return 0;
}
int hash = calculateHashForKeyValue(cell);
hash = 31 * hash + (int)cell.getMvccVersion();
return hash;
}
/**
* Returns a hash code that is always the same for two Cells having a matching
* equals(..) result. Note : Ignore mvcc while calculating the hashcode
*
* @param cell
* @return hashCode
*/
public static int hashCodeIgnoreMvcc(Cell cell) {
if (cell == null) {// return 0 for empty Cell
return 0;
}
int hash = calculateHashForKeyValue(cell);
return hash;
}
private static int calculateHashForKeyValue(Cell cell) {
//pre-calculate the 3 hashes made of byte ranges
int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength());
int familyHash =
Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength());
int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(),
cell.getQualifierLength());
//combine the 6 sub-hashes
int hash = 31 * rowHash + familyHash;
hash = 31 * hash + qualifierHash;
hash = 31 * hash + (int)cell.getTimestamp();
hash = 31 * hash + cell.getTypeByte();
return hash;
}
/******************** lengths *************************/
public static boolean areKeyLengthsEqual(Cell a, Cell b) {
return a.getRowLength() == b.getRowLength()
&& a.getFamilyLength() == b.getFamilyLength()
&& a.getQualifierLength() == b.getQualifierLength();
}
public static boolean areRowLengthsEqual(Cell a, Cell b) {
return a.getRowLength() == b.getRowLength();
}
/*********************common prefixes*************************/
private static int compare(byte[] left, int leftOffset, int leftLength, byte[] right,
int rightOffset, int rightLength) {
return Bytes.compareTo(left, leftOffset, leftLength, right, rightOffset, rightLength);
}
public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) {
return compare(left.getRowArray(), left.getRowOffset() + rowCommonPrefix, left.getRowLength()
- rowCommonPrefix, right.getRowArray(), right.getRowOffset() + rowCommonPrefix,
right.getRowLength() - rowCommonPrefix);
}
public static int compareCommonFamilyPrefix(Cell left, Cell right,
int familyCommonPrefix) {
return compare(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix,
left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(),
right.getFamilyOffset() + familyCommonPrefix,
right.getFamilyLength() - familyCommonPrefix);
}
public static int compareCommonQualifierPrefix(Cell left, Cell right,
int qualCommonPrefix) {
return compare(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix,
left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(),
right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength()
- qualCommonPrefix);
}
/***************** special cases ****************************/
/**
* special case for KeyValue.equals
*/
public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){
return 0 == compareStaticIgnoreMvccVersion(a, b);
}
private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) {
// row
int c = compareRows(a, b);
if (c != 0) return c;
// family
c = compareColumns(a, b);
if (c != 0) return c;
// timestamp: later sorts first
c = compareTimestamps(a, b);
if (c != 0) return c;
//type
c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte());
return c;
}
/**
* Compares timestamps in DESCENDING order.
* The below older timestamps sorting ahead of newer timestamps looks
* wrong but it is intentional. This way, newer timestamps are first
* found when we iterate over a memstore and newer versions are the
* first we trip over when reading from a store file.
* @return 1 if left timestamp < right timestamp
* -1 if left timestamp > right timestamp
* 0 if both timestamps are equal
*/
private static int compareTimestamps(final long ltimestamp, final long rtimestamp) {
if (ltimestamp < rtimestamp) {
return 1;
} else if (ltimestamp > rtimestamp) {
return -1;
}
return 0;
}
/**
* Counter part for the KeyValue.RowOnlyComparator
*/
public static class RowComparator extends CellComparator {
@Override
public int compare(Cell a, Cell b) {
return compareRows(a, b);
}
}
/**
* Try to return a Cell that falls between left and right but that is
* shorter; i.e. takes up less space. This trick is used building HFile block index.
* Its an optimization. It does not always work. In this case we'll just return the
* right cell.
* @param comparator Comparator to use.
* @param left
* @param right
* @return A cell that sorts between left and right .
*/
public static Cell getMidpoint(final KeyValue.KVComparator comparator, final Cell left,
final Cell right) {
// TODO: Redo so only a single pass over the arrays rather than one to compare and then a
// second composing midpoint.
if (right == null) {
throw new IllegalArgumentException("right cell can not be null");
}
if (left == null) {
return right;
}
// If Cells from meta table, don't mess around. meta table Cells have schema
// (table,startrow,hash) so can't be treated as plain byte arrays. Just skip out without
// trying to do this optimization.
if (comparator != null && comparator instanceof KeyValue.MetaComparator) {
return right;
}
int diff = compareRows(left, right);
if (diff > 0) {
throw new IllegalArgumentException("Left row sorts after right row; left=" +
CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
}
if (diff < 0) {
// Left row is < right row.
byte [] midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(),
left.getRowLength(),
right.getRowArray(), right.getRowOffset(), right.getRowLength());
// If midRow is null, just return 'right'. Can't do optimization.
if (midRow == null) return right;
return CellUtil.createCell(midRow);
}
// Rows are same. Compare on families.
diff = compareFamilies(left, right);
if (diff > 0) {
throw new IllegalArgumentException("Left family sorts after right family; left=" +
CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
}
if (diff < 0) {
byte [] midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(),
left.getFamilyLength(),
right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength());
// If midRow is null, just return 'right'. Can't do optimization.
if (midRow == null) return right;
// Return new Cell where we use right row and then a mid sort family.
return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(),
midRow, 0, midRow.length, HConstants.EMPTY_BYTE_ARRAY, 0,
HConstants.EMPTY_BYTE_ARRAY.length);
}
// Families are same. Compare on qualifiers.
diff = compareQualifiers(left, right);
if (diff > 0) {
throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left=" +
CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right));
}
if (diff < 0) {
byte [] midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(),
left.getQualifierLength(),
right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength());
// If midRow is null, just return 'right'. Can't do optimization.
if (midRow == null) return right;
// Return new Cell where we use right row and family and then a mid sort qualifier.
return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(),
right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength(),
midRow, 0, midRow.length);
}
// No opportunity for optimization. Just return right key.
return right;
}
/**
* @param leftArray
* @param leftOffset
* @param leftLength
* @param rightArray
* @param rightOffset
* @param rightLength
* @return Return a new array that is between left and right and minimally sized else just return
* null as indicator that we could not create a mid point.
*/
private static byte [] getMinimumMidpointArray(final byte [] leftArray, final int leftOffset,
final int leftLength,
final byte [] rightArray, final int rightOffset, final int rightLength) {
// rows are different
int minLength = leftLength < rightLength ? leftLength : rightLength;
int diffIdx = 0;
while (diffIdx < minLength &&
leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) {
diffIdx++;
}
byte [] minimumMidpointArray = null;
if (diffIdx >= minLength) {
// leftKey's row is prefix of rightKey's.
minimumMidpointArray = new byte[diffIdx + 1];
System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
} else {
int diffByte = leftArray[leftOffset + diffIdx];
if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) {
minimumMidpointArray = new byte[diffIdx + 1];
System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx);
minimumMidpointArray[diffIdx] = (byte) (diffByte + 1);
} else {
minimumMidpointArray = new byte[diffIdx + 1];
System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1);
}
}
return minimumMidpointArray;
}
}
|
© 2015 - 2024 Weber Informatics LLC | Privacy Policy