org.hbase.async.KeyValue Maven / Gradle / Ivy
Show all versions of asynchbase Show documentation
/*
* Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved.
* This file is part of Async HBase.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the StumbleUpon nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.hbase.async;
import java.util.Arrays;
import org.jboss.netty.buffer.ChannelBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.hbase.async.generated.CellPB;
/**
* A "cell" in an HBase table.
*
* This represents one unit of HBase data, one "record".
*
*
A note {@code byte} arrays
* This class will never copy any {@code byte[]} that's given to it, neither
* will it create a copy before returning one to you.
* Changing a byte array get from or pass to this class will have
* unpredictable consequences. In particular, multiple
* {@link KeyValue} instances may share the same byte arrays, so changing
* one instance may also unexpectedly affect others.
*/
public final class KeyValue implements Comparable {
/**
* Timestamp value to let the server set the timestamp at processing time.
* When this value is used as a timestamp on a {@code KeyValue}, the server
* will substitute a real timestamp at the time it processes it. HBase uses
* current UNIX time in milliseconds.
*/
public static final long TIMESTAMP_NOW = Long.MAX_VALUE;
//private static final Logger LOG = LoggerFactory.getLogger(KeyValue.class);
private final byte[] key; // Max length: Short.MAX_VALUE = 32768
private final byte[] family; // Max length: Byte.MAX_VALUE = 128
private final byte[] qualifier;
private final byte[] value;
private final long timestamp;
//private final byte type; // Not needed for us ATM.
// Note: type can be one of:
// - 4 0b00000100 Put
static final byte PUT = 4;
// - 8 0b00001000 Delete (delete the specified version of a cell)
static final byte DELETE = 8;
// - 12 0b00001100 DeleteColumn (delete all previous versions of a cell)
static final byte DELETE_COLUMN = 12;
// - 14 0b01110010 DeleteFamily (delete all cells within a family)
static final byte DELETE_FAMILY = 14;
// (Not sure how those have been assigned... Randomly maybe?)
/**
* Constructor.
* @param key The row key. Length must fit in 16 bits.
* @param family The column family. Length must fit in 8 bits.
* @param qualifier The column qualifier.
* @param timestamp Timestamp on the value. This timestamp can be set to
* guarantee ordering of values or operations. It is strongly advised to
* use a UNIX timestamp in milliseconds, e.g. from a source such as
* {@link System#currentTimeMillis}. This value must be strictly positive.
* @param value The value, the contents of the cell.
* @throws IllegalArgumentException if any argument is invalid (e.g. array
* size is too long) or if the timestamp is negative.
* @since 1.2
*/
public KeyValue(final byte[] key,
final byte[] family, final byte[] qualifier,
final long timestamp,
//final byte type,
final byte[] value) {
checkKey(key);
checkFamily(family);
checkQualifier(qualifier);
checkTimestamp(timestamp);
checkValue(value);
this.key = key;
this.family = family;
this.qualifier = qualifier;
this.value = value;
this.timestamp = timestamp;
//this.type = type;
}
/**
* Constructor.
*
* This {@code KeyValue} will be timestamped by the server at the time
* the server processes it.
* @param key The row key. Length must fit in 16 bits.
* @param family The column family. Length must fit in 8 bits.
* @param qualifier The column qualifier.
* @param value The value, the contents of the cell.
* @throws IllegalArgumentException if any argument is invalid (e.g. array
* size is too long).
* @see #TIMESTAMP_NOW
*/
public KeyValue(final byte[] key,
final byte[] family, final byte[] qualifier,
final byte[] value) {
this(key, family, qualifier, TIMESTAMP_NOW, value);
}
/** Returns the row key. */
public byte[] key() {
return key;
}
/** Returns the column family. */
public byte[] family() {
return family;
}
/** Returns the column qualifier. */
public byte[] qualifier() {
return qualifier;
}
/**
* Returns the timestamp stored in this {@code KeyValue}.
* @see #TIMESTAMP_NOW
*/
public long timestamp() {
return timestamp;
}
//public byte type() {
// return type;
//}
/** Returns the value, the contents of the cell. */
public byte[] value() {
return value;
}
@Override
public int compareTo(final KeyValue other) {
int d;
if ((d = Bytes.memcmp(key, other.key)) != 0) {
return d;
} else if ((d = Bytes.memcmp(family, other.family)) != 0) {
return d;
} else if ((d = Bytes.memcmp(qualifier, other.qualifier)) != 0) {
return d;
//} else if ((d = Bytes.memcmp(value, other.value)) != 0) {
// return d;
} else if ((d = Long.signum(timestamp - other.timestamp)) != 0) {
return d;
} else {
// d = type - other.type;
d = Bytes.memcmp(value, other.value);
}
return d;
}
public boolean equals(final Object other) {
if (other == null || !(other instanceof KeyValue)) {
return false;
}
return compareTo((KeyValue) other) == 0;
}
public int hashCode() {
return Arrays.hashCode(key)
^ Arrays.hashCode(family)
^ Arrays.hashCode(qualifier)
^ Arrays.hashCode(value)
^ (int) (timestamp ^ (timestamp >>> 32))
//^ type
;
}
public String toString() {
final StringBuilder buf = new StringBuilder(84 // Boilerplate + timestamp
// the row key is likely to contain non-ascii characters, so
// let's multiply its length by 2 to avoid re-allocations.
+ key.length * 2 + family.length + qualifier.length + value.length);
buf.append("KeyValue(key=");
Bytes.pretty(buf, key);
buf.append(", family=");
Bytes.pretty(buf, family);
buf.append(", qualifier=");
Bytes.pretty(buf, qualifier);
buf.append(", value=");
Bytes.pretty(buf, value);
buf.append(", timestamp=").append(timestamp);
// .append(", type=").append(type);
buf.append(')');
return buf.toString();
}
/**
* De-serializes {@link KeyValue} from a buffer (HBase 0.94 and before).
* @param buf The buffer to de-serialize from.
* @param prev Another {@link KeyValue} previously de-serialized from the
* same buffer. Can be {@code null}. The idea here is that KeyValues
* often come in a sorted batch, and often share a number of byte arrays
* (e.g. they all have the same row key and/or same family...). When
* you specify another KeyValue, its byte arrays will be re-used in order
* to avoid having too much duplicate data in memory. This costs a little
* bit of CPU time to compare the arrays but saves memory (which in turns
* saves CPU time later).
* @return a new instance (guaranteed non-{@code null}).
* @throws IllegalArgumentException if the buffer seems to contain a
* malformed {@link KeyValue}.
*/
public static KeyValue fromBuffer(final ChannelBuffer buf,
final KeyValue prev) {
final int rowkey_length = buf.readInt(); // Total length of the row key.
//LOG.debug("rowkey_length="+rowkey_length);
HBaseRpc.checkNonEmptyArrayLength(buf, rowkey_length);
final int value_length = buf.readInt();
//LOG.debug("value_length="+value_length);
HBaseRpc.checkArrayLength(buf, value_length);
final short key_length = buf.readShort();
//LOG.debug("key_length="+key_length);
HBaseRpc.checkArrayLength(buf, value_length);
final byte[] key = new byte[key_length];
buf.readBytes(key);
//LOG.debug("key="+Bytes.pretty(key));
final byte family_length = buf.readByte();
if (key_length + family_length + 2 + 1 + 8 + 1 > rowkey_length) {
invalid("rowkey_length="
+ key_length + " doesn't match key_length + family_length ("
+ key_length + " + " + family_length + " +12) in " + buf + '='
+ Bytes.pretty(buf));
}
final byte[] family = new byte[family_length];
buf.readBytes(family);
final int qual_length = (rowkey_length - key_length - family_length
- 2 - 1 - 8 - 1);
HBaseRpc.checkArrayLength(buf, qual_length);
final byte[] qualifier;
if (qual_length > 0) {
qualifier = new byte[qual_length];
buf.readBytes(qualifier);
} else {
qualifier = HBaseClient.EMPTY_ARRAY;
}
final long timestamp = buf.readLong();
final byte key_type = buf.readByte();
final byte[] value;
if (value_length > 0) {
value = new byte[value_length];
buf.readBytes(value);
} else {
value = HBaseClient.EMPTY_ARRAY;
}
if (2 + key_length + 1 + family_length + qual_length + 8 + 1
!= rowkey_length) { // XXX TMP DEBUG
invalid("2 + rl:" + key_length + " + 1 + fl:" + family_length + " + ql:"
+ qual_length + " + 8 + 1" + " != kl:" + rowkey_length);
}
if (prev == null) {
return new KeyValue(key, family, qualifier, timestamp, /*key_type,*/
value);
} else {
return new KeyValue(Bytes.deDup(prev.key, key),
Bytes.deDup(prev.family, family),
Bytes.deDup(prev.qualifier, qualifier),
timestamp, /*key_type,*/ value);
}
}
private static void invalid(final String errmsg) {
throw new IllegalArgumentException(errmsg);
}
/**
* Transforms a protobuf Cell message into a KeyValue (HBase 0.95+).
* @param buf The buffer to de-serialize from.
* @param prev Another {@link KeyValue} previously de-serialized from the
* same buffer. Can be {@code null}. The idea here is that KeyValues
* often come in a sorted batch, and often share a number of byte arrays
* (e.g. they all have the same row key and/or same family...). When
* you specify another KeyValue, its byte arrays will be re-used in order
* to avoid having too much duplicate data in memory. This costs a little
* bit of CPU time to compare the arrays but saves memory (which in turns
* saves CPU time later).
* @return a new instance (guaranteed non-{@code null}).
*/
static KeyValue fromCell(final CellPB.Cell cell, final KeyValue prev) {
final byte[] key = Bytes.get(cell.getRow());
final byte[] family = Bytes.get(cell.getFamily());
final byte[] qualifier = Bytes.get(cell.getQualifier());
final long timestamp = cell.getTimestamp();
final byte[] value = Bytes.get(cell.getValue());
if (prev == null) {
return new KeyValue(key, family, qualifier, timestamp, /*key_type,*/
value);
} else {
return new KeyValue(Bytes.deDup(prev.key, key),
Bytes.deDup(prev.family, family),
Bytes.deDup(prev.qualifier, qualifier),
timestamp, /*key_type,*/ value);
}
}
// ------------------------------------------------------------ //
// Misc helper functions to validate some aspects of KeyValues. //
// ------------------------------------------------------------ //
// OK this isn't technically part of a KeyValue but since all the similar
// functions are here, let's keep things together in one place.
/**
* Validates a table name.
* @throws IllegalArgumentException if the table name is too big or
* malformed.
* @throws NullPointerException if the table name is {@code null}.
*/
static void checkTable(final byte[] table) {
if (table.length > Byte.MAX_VALUE) {
throw new IllegalArgumentException("Table name too long: "
+ table.length + " bytes long " + Bytes.pretty(table));
} else if (table.length == 0) {
throw new IllegalArgumentException("empty table name");
}
}
/**
* Validates a row key.
* @throws IllegalArgumentException if the key is too big.
* @throws NullPointerException if the key is {@code null}.
*/
static void checkKey(final byte[] key) {
if (key.length > Short.MAX_VALUE) {
throw new IllegalArgumentException("row key too long: "
+ key.length + " bytes long " + Bytes.pretty(key));
}
}
/**
* Validates a column family.
* @throws IllegalArgumentException if the family name is too big.
* @throws NullPointerException if the family is {@code null}.
*/
static void checkFamily(final byte[] family) {
if (family.length > Byte.MAX_VALUE) {
throw new IllegalArgumentException("column family too long: "
+ family.length + " bytes long " + Bytes.pretty(family));
}
}
/**
* Validates a column qualifier.
* @throws IllegalArgumentException if the qualifier name is too big.
* @throws NullPointerException if the qualifier is {@code null}.
*/
static void checkQualifier(final byte[] qualifier) {
HBaseRpc.checkArrayLength(qualifier);
}
/**
* Validates a timestamp.
* @throws IllegalArgumentException if the timestamp is zero or negative.
*/
static void checkTimestamp(final long timestamp) {
if (timestamp < 0) {
throw new IllegalArgumentException("Negative timestamp: " + timestamp);
}
}
/**
* Validates a value (the contents of an HBase cell).
* @throws IllegalArgumentException if the value is too big.
* @throws NullPointerException if the value is {@code null}.
*/
static void checkValue(final byte[] value) {
HBaseRpc.checkArrayLength(value);
}
// ---------------------- //
// Serialization helpers. //
// ---------------------- //
/**
* Serializes this KeyValue.
* @param buf The buffer into which to write the serialized form.
* @param type What kind of KV (e.g. {@link #PUT} or {@link DELETE_FAMILY}).
*/
void serialize(final ChannelBuffer buf, final byte type) {
serialize(buf, type, timestamp, key, family, qualifier, value);
}
/**
* Returns the serialized length of a KeyValue.
*/
int predictSerializedSize() {
return predictSerializedSize(key, family, qualifier, value);
}
/**
* Returns the serialized length of a KeyValue.
*/
static int predictSerializedSize(final byte[] key,
final byte[] family,
final byte[] qualifier,
final byte[] value) {
return
+ 4 // int: Total length of the whole KeyValue.
+ 4 // int: Total length of the key part of the KeyValue.
+ 4 // int: Total length of the value part of the KeyValue.
+ 2 // short: Row key length.
+ key.length // The row key.
+ 1 // byte: Family length.
+ family.length // The family.
+ qualifier.length // The qualifier.
+ 8 // long: The timestamp.
+ 1 // byte: The type of KeyValue.
+ (value == null ? 0 : value.length);
}
/**
* Serializes a KeyValue.
* @param buf The buffer into which to write the serialized form.
* @param type What kind of KV (e.g. {@link #PUT} or {@link DELETE_FAMILY}).
* @param timestamp The timestamp to put on the KV.
*/
static void serialize(final ChannelBuffer buf,
final byte type,
final long timestamp,
final byte[] key,
final byte[] family,
final byte[] qualifier,
final byte[] value) {
final int val_length = value == null ? 0 : value.length;
final int key_length = 2 + key.length + 1 + family.length
+ qualifier.length + 8 + 1;
// Write the length of the whole KeyValue again (this is so useless...).
buf.writeInt(4 + 4 + key_length + val_length); // Total length.
buf.writeInt(key_length); // Key length.
buf.writeInt(val_length); // Value length.
// Then the whole key.
buf.writeShort(key.length); // Row length.
buf.writeBytes(key); // The row key (again!).
buf.writeByte((byte) family.length); // Family length.
buf.writeBytes(family); // Write the family (again!).
buf.writeBytes(qualifier); // The qualifier.
buf.writeLong(timestamp); // The timestamp (again!).
buf.writeByte(type); // Type of edit
if (value != null) {
buf.writeBytes(value); // Finally, the value (if any).
}
}
}