com.bigdata.btree.raba.codec.SimpleRabaCoder Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 13, 2009
*/
package com.bigdata.btree.raba.codec;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;
/**
* This class does not offer any compression. It merely stores the byte[][] in a
* {@link ByteBuffer} together with offset information required to extract the
* original byte[]s using a random access pattern. It supports both B+Tree keys
* and B+Tree values.
*
* @author Bryan Thompson
*/
public class SimpleRabaCoder implements IRabaCoder, Externalizable {
/**
*
*/
private static final long serialVersionUID = 3385188183979794781L;
/**
* The original version for the coded data record.
*/
private static final byte VERSION0 = 0x00;
/**
* New version for the coded data record also records the capacity of
* the original {@link IRaba}.
*/
private static final byte VERSION1 = 0x01;
private static final byte CURRENT_VERSION = VERSION1;
public static transient final SimpleRabaCoder INSTANCE = new SimpleRabaCoder();
/**
* Yes.
*/
@Override
final public boolean isKeyCoder() {
return true;
}
/**
* Yes.
*/
@Override
final public boolean isValueCoder() {
return true;
}
@Override
public boolean isDuplicateKeys() {
return false;
}
/**
* De-serialization ctor. Use {@link #INSTANCE} otherwise.
*/
public SimpleRabaCoder() {
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
// NOP
}
@Override
public void readExternal(ObjectInput in) throws IOException,
ClassNotFoundException {
// NOP
}
/** The size of the version field. */
static private final int SIZEOF_VERSION = 1;
/** The size of the bit flags. */
static private final int SIZEOF_FLAGS = 1;
/** The size of the size field. */
static private final int SIZEOF_SIZE = Bytes.SIZEOF_INT;
/**
* The size of the capacity field.
* @since #VERSION1
*/
static private final int SIZEOF_CAPACITY = Bytes.SIZEOF_INT;
/** The size of the field coding the #of elements in the offset[]. */
static private final int SIZEOF_OFFSET = Bytes.SIZEOF_INT;
/** The byte offset to the version identifier. */
static private final int O_VERSION = 0;
/** The byte offset of the bit flags. */
static private final int O_FLAGS = O_VERSION + SIZEOF_VERSION;
/** The byte offset of the field coding the #of entries in the raba. */
static private final int O_SIZE = O_FLAGS + SIZEOF_FLAGS;
/** The byte offset to the bit flags coding the nulls. */
static private final int O_NULLS(final byte version) {
if (version == VERSION0)
return O_SIZE + SIZEOF_SIZE;
return O_SIZE + SIZEOF_SIZE + SIZEOF_CAPACITY;
}
@Override
public ICodedRaba encodeLive(final IRaba raba, final DataOutputBuffer buf) {
if (raba == null)
throw new IllegalArgumentException();
if (buf == null)
throw new IllegalArgumentException();
// The #of entries.
final int size = raba.size();
// The logical capacity of the raba.
final int capacity = raba.capacity();
// iff the raba represents B+Tree keys.
final boolean isKeys = raba.isKeys();
// #of bytes for the offset[].
final int sizeOfOffsets = (size + 1) * SIZEOF_OFFSET;
// The byte offset of the origin of the coded data in the buffer.
final int O_origin = buf.pos();
// version
assert buf.pos() == O_VERSION + O_origin;
buf.putByte(CURRENT_VERSION);
// a byte containing a single bit flag.
assert buf.pos() == O_FLAGS + O_origin;
buf.putByte((byte) (isKeys ? 1 : 0));
// #of entries.
assert buf.pos() == O_SIZE + O_origin;
buf.putInt(size);
// logical capacity.
if (CURRENT_VERSION >= VERSION1)
buf.putInt(capacity);
// bit flag nulls
if (!isKeys) {
// assert buf.pos() == O_NULLS + O_origin;
for (int i = 0; i < size;) {
byte bits = 0;
for (int j = 0; j < 8 && i < size; j++, i++) {
if (raba.isNull(i)) {
// Note: bit order is per BitInputStream & BytesUtil!
bits |= 1 << (7 - j);
}
}
buf.putByte(bits);
}
}
// offset[]
// assert size > 0 && buf.pos() > O_NULLS + O_origin || size == 0 || isKeys;
final int O_offsets = buf.pos() + sizeOfOffsets - O_origin;
int lastOffset = O_offsets;
for (int i = 0; i < size; i++) {
if (raba.isNull(i)) {
buf.putInt(lastOffset);
} else {
buf.putInt(lastOffset);
lastOffset += raba.length(i);
}
}
buf.putInt(lastOffset);
// byte[]s
for (int i = 0; i < size; i++) {
if (!raba.isNull(i)) {
buf.put(raba.get(i));
}
}
assert buf.pos() == buf.limit() : buf.toString() + " : src=" + raba;
final AbstractFixedByteArrayBuffer slice = buf.slice(//
O_origin, buf.pos() - O_origin);
return new CodedRabaImpl(slice, isKeys, size, capacity, CURRENT_VERSION);
// return new CodedRabaImpl(slice);
}
@Override
public AbstractFixedByteArrayBuffer encode(final IRaba raba,
final DataOutputBuffer buf) {
/*
* There is nearly zero overhead for this code path when compared to
* encodeLive().
*/
return encodeLive(raba, buf).data();
}
@Override
public ICodedRaba decode(final AbstractFixedByteArrayBuffer data) {
return new CodedRabaImpl(data);
}
/**
* Class provides in place access to the "coded" logical byte[][].
*
* @author Bryan Thompson
* @version $Id$
*/
private static class CodedRabaImpl extends AbstractCodedRaba {
/**
* The #of entries (cached).
*/
private final int size;
/**
* The logical capacity of the {@link IRaba} (cached).
*/
private final int capacity;
/** Offset to the bit flag nulls. */
private final int o_nulls;
/**
* If the record codes B+Tree keys.
*/
private final boolean isKeys;
private final AbstractFixedByteArrayBuffer data;
/**
*
* @param data
*/
public CodedRabaImpl(final AbstractFixedByteArrayBuffer data) {
if (data == null)
throw new IllegalArgumentException();
this.data = data;
final byte version = data.getByte(O_VERSION);
switch (version) {
case VERSION0:
case VERSION1:
break;
default:
throw new RuntimeException("Unknown version: " + version);
}
// Note: Only one bit flag, so we do not need a mask.
this.isKeys = data.getByte(O_FLAGS) != 0;
this.size = data.getInt(O_SIZE);
if (version >= VERSION1)
this.capacity = data.getInt(O_SIZE + SIZEOF_SIZE);
else
this.capacity = size;
if (size < 0)
throw new IllegalArgumentException();
if (capacity < 0)
throw new IllegalArgumentException();
// #of bytes required for the nulls bit flags.
final int bitFlagByteCount = isKeys ? 0 : BytesUtil
.bitFlagByteLength(size);
o_nulls = O_NULLS(version);
// offset of the offset[].
O_offsets = o_nulls + bitFlagByteCount;
}
public CodedRabaImpl(final AbstractFixedByteArrayBuffer data,
final boolean isKeys, final int size, final int capacity,
final byte version) {
this.data = data;
this.isKeys = isKeys;
this.size = size;
this.capacity = capacity;
// #of bytes required for the nulls bit flags.
final int bitFlagByteCount = isKeys ? 0 : BytesUtil
.bitFlagByteLength(size);
o_nulls = O_NULLS(version);
// offset of the offset[].
O_offsets = o_nulls + bitFlagByteCount;
}
/**
* The offset into the buffer of the offset[]. This array is also used
* to compute the length of any given byte[] by subtracting the offset
* of the next byte[] from the offset of the desired byte[], which is
* why we write size+1
values into this array.
*/
private final int O_offsets;
@Override
final public AbstractFixedByteArrayBuffer data() {
return data;
}
@Override
public boolean isKeys() {
return isKeys;
}
@Override
final public int capacity() {
return capacity;
}
@Override
final public int size() {
return size;
}
@Override
final public boolean isEmpty() {
return size == 0;
}
@Override
final public boolean isFull() {
return true;
}
protected void rangeCheck(final int index) {
if (index < 0 || index >= size)
throw new IndexOutOfBoundsException();
}
@Override
public boolean isNull(final int index) {
if (index >= size && index < capacity) {
// everything beyond the size and up to the capacity is a null.
return true;
}
rangeCheck(index);
if (isKeys)
return false;
return data.getBit((o_nulls << 3) + index);
}
@Override
public int length(final int index) {
if (isNull(index))
throw new NullPointerException();
final int offset = data.getInt(O_offsets + index * SIZEOF_OFFSET);
final int offset2 = data.getInt(O_offsets + (index + 1)
* SIZEOF_OFFSET);
final int length = offset2 - offset;
assert length >= 0;
return length;
}
@Override
public byte[] get(final int index) {
if (isNull(index))
return null;
final int offset = data.getInt(O_offsets + index * SIZEOF_OFFSET);
final int offset2 = data.getInt(O_offsets + (index + 1)
* SIZEOF_OFFSET);
final int length = offset2 - offset;
assert length >= 0;
final byte[] a = new byte[length];
/*
* Copy the byte[] from the buffer.
*/
// *
// * Note: The buffer is duplicated first to avoid concurrent
// * modification to the buffer's internal state.
// */
// final ByteBuffer data = this.data.duplicate();
// data.limit(offset2);
// data.position(offset);
// data.get(a);
data.get(offset, a, 0/* dstoff */, length);
return a;
}
@Override
public int copy(final int index, final OutputStream os) {
if (isNull(index))
throw new NullPointerException();
final int offset = data.getInt(O_offsets + index * SIZEOF_OFFSET);
final int offset2 = data.getInt(O_offsets + (index + 1)
* SIZEOF_OFFSET);
final int length = offset2 - offset;
assert length >= 0;
try {
data.writeOn(os, offset, length);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
return length;
}
/*
* Search
*/
@Override
public int search(final byte[] key) {
if (!isKeys())
throw new UnsupportedOperationException();
/*
* Note: base, mid, low, and high are offsets into the offset[]. The
* offset[] has size+1 entries, but only the first size entries
* correspond to byte[] values stored in the record. The last entry
* is just so we can figure out the length of the last byte[] stored
* in the record.
*/
final int base = 0;
final int nmem = size;
int low = 0;
int high = nmem - 1;
while (low <= high) {
final int mid = (low + high) >> 1;
final int offset = base + mid;
// offset into the buffer of the start of that byte[].
final int aoff = data
.getInt(O_offsets + offset * SIZEOF_OFFSET);
// length of that byte[].
final int alen = data.getInt(O_offsets + (offset + 1)
* SIZEOF_OFFSET)
- aoff;
// compare actual data vs probe key.
final int tmp = BytesUtil.compareBytesWithLenAndOffset(//
data.off() + aoff, alen, data.array(), //
0, key.length, key);
if (tmp < 0) {
// Actual LT probe, restrict lower bound and try again.
low = mid + 1;
} else if (tmp > 0) {
// Actual GT probe, restrict upper bound and try again.
high = mid - 1;
} else {
// Actual EQ probe. Found : return offset.
return offset;
}
}
// Not found: return insertion point.
final int offset = (base + low);
return -(offset + 1);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy