com.bigdata.btree.data.DefaultNodeCoder Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 28, 2009
*/
package com.bigdata.btree.data;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.io.OutputBitStream;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import com.bigdata.btree.MutableNodeData;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.codec.ICodedRaba;
import com.bigdata.btree.raba.codec.IRabaCoder;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;
/**
* Default implementation for immutable {@link INodeData} records.
*
* @author Bryan Thompson
* @version $Id$
*
* @todo partly mutable coded records for {@link INodeData} are feasible. The
* only reason to expand an {@link INodeData} into a fully
* {@link MutableNodeData} is if we need to modify the keys. The rest of
* the fields could be easily patched in place if they were not coded
* (they are fixed length fields). Of course, we have a more compact
* representation when those fields ARE coded.
*/
public class DefaultNodeCoder implements IAbstractNodeDataCoder,
Externalizable {
/**
*
*/
private static final long serialVersionUID = 3998574101917337169L;
/**
* The initial version of the serialized representation of the
* {@link DefaultNodeCoder} class (versus the serializer representation of
* the node or leaf).
*/
private final static transient byte VERSION0 = 0x00;
private IRabaCoder keysCoder;
@Override
public void readExternal(final ObjectInput in) throws IOException,
ClassNotFoundException {
final byte version = in.readByte();
switch(version) {
case VERSION0:
break;
default:
throw new IOException();
}
keysCoder = (IRabaCoder) in.readObject();
}
@Override
public void writeExternal(final ObjectOutput out) throws IOException {
out.write(VERSION0);
out.writeObject(keysCoder);
}
/** No. */
@Override
final public boolean isLeafDataCoder() {
return false;
}
/** Yes. */
@Override
public boolean isNodeDataCoder() {
return true;
}
@Override
public String toString() {
return super.toString() + "{keysCoder=" + keysCoder + "}";
}
/**
* De-serialization ctor.
*/
public DefaultNodeCoder() {
}
/**
*
* @param keysCoder
* The {@link IRabaCoder} for the node's keys.
*/
public DefaultNodeCoder(final IRabaCoder keysCoder) {
if (keysCoder == null)
throw new IllegalArgumentException();
this.keysCoder = keysCoder;
}
@Override
public INodeData decode(final AbstractFixedByteArrayBuffer data) {
return new ReadOnlyNodeData(data, keysCoder);
}
@Override
public INodeData encodeLive(final INodeData node, final DataOutputBuffer buf) {
if (node == null)
throw new IllegalArgumentException();
if (keysCoder == null)
throw new IllegalArgumentException();
if (buf == null)
throw new IllegalArgumentException();
final short version = ReadOnlyNodeData.currentVersion;
// cache some fields.
final int nkeys = node.getKeyCount();
final long nentries = node.getSpannedTupleCount();
// The byte offset of the start of the coded data in the buffer.
final int O_origin = buf.pos();
buf.putByte(ReadOnlyNodeData.NODE);
buf.putShort(version);
final boolean hasVersionTimestamps = node.hasVersionTimestamps();
short flags = 0;
if (hasVersionTimestamps) {
flags |= AbstractReadOnlyNodeData.FLAG_VERSION_TIMESTAMPS;
}
buf.putShort(flags);
buf.putInt(nkeys);
if (nentries < 0) {
/*
* Note: This allows ZERO entries in order to support some unit
* tests an empty node. However, an empty node is not a legal
* data structure in a btree. Only the root leaf may be empty.
*/
throw new RuntimeException();
}
if (version == ReadOnlyNodeData.VERSION0) {
if (nentries > Integer.MAX_VALUE)
throw new UnsupportedOperationException();
buf.putInt((int) nentries);
} else {
buf.putLong(nentries);
}
// The offset at which the byte length of the keys will be recorded.
final int O_keysSize = buf.pos();
buf.skip(ReadOnlyNodeData.SIZEOF_KEYS_SIZE);
// Write the encoded keys on the buffer.
// final int O_keys = buf.pos();
final ICodedRaba encodedKeys = keysCoder
.encodeLive(node.getKeys(), buf);
// final AbstractFixedByteArrayBuffer encodedKeysData = encodedKeys.data();
// Patch the byte length of the coded keys on the buffer.
buf.putInt(O_keysSize, encodedKeys.data().len());
// childAddr[] : @todo code childAddr[] (needs IAddressManager if store aware coding).
// final int O_childAddr = buf.pos();
for (int i = 0; i <= nkeys; i++) {
/*
* See #855 (Child identity is not persistent).
*/
final long childAddr = node.getChildAddr(i);
if (childAddr == IRawStore.NULL)
throw new AssertionError("Child is not persistent: index=" + i
+ " out of " + nkeys + " entries, " + node.toString());
buf.putLong(childAddr);
}
// final int O_childEntryCount = buf.pos();
if (version == ReadOnlyNodeData.VERSION0) {
long sum = 0;
for (int i = 0; i <= nkeys; i++) {
final long nchildren = node.getChildEntryCount(i);
if (nchildren < 0)
throw new AssertionError();
if (nchildren > Integer.MAX_VALUE)
throw new UnsupportedOperationException();
buf.putInt((int) nchildren);
sum += nchildren;
}
if (sum != nentries)
throw new RuntimeException("spannedTupleCount=" + nentries
+ ", but sum over children=" + sum);
} else {
/*
* The min is written out as a full length long value. The per child
* entry counts are written out using the minimum #of bits required
* to code the data.
*
* Note: If min==max then ZERO bits are used per child!
*
* The encoding takes:
*
* nbits := 1 byte
* min := 8 bytes
* array := BytesUtil.bitFlagByteLength((nkeys + 1)* nbits)
*
* The encoding is byte aligned so the next data will begin on an
* even byte boundary.
*/
long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
long sum = 0;
for (int i = 0; i <= nkeys; i++) {
final long nchildren = node.getChildEntryCount(i);
sum += nchildren;
if (nchildren < 0) {
/*
* Note: ZERO is permitted for a test case, but is not legal
* in live data.
*/
throw new RuntimeException();
}
if (min > nchildren)
min = nchildren;
if (max < nchildren)
max = nchildren;
}
if (sum != nentries)
throw new RuntimeException("spannedTupleCount=" + nentries
+ ", but sum over children=" + sum);
if (sum == 0)
min = max = 0;
final long delta = max - min;
assert delta >= 0;
// will be in [1:64]
final byte nbits = (byte) (Fast.mostSignificantBit(delta) + 1);
// one byte.
buf.putByte((byte) nbits);
// offset of minVersionTimestamp.
// O_versionTimestamps = buf.pos();
// int64
buf.putLong(min);
// // int64
// buf.putLong(max);
if (nbits > 0) {
/*
* Note: We only write the deltas if
* (min!=max). When min==max, the
* deltas are coded in zero bits, so this would be a NOP anyway.
*/
final int byteLength = BytesUtil.bitFlagByteLength((nkeys + 1)
* nbits/* nbits */);
final byte[] a = new byte[byteLength];
final OutputBitStream obs = new OutputBitStream(a);
try {
// array of [deltaBits] length fields.
for (int i = 0; i <= nkeys; i++) {
final long d = node.getChildEntryCount(i) - min;
assert d >= 0;
obs.writeLong(d, nbits);
}
obs.flush();
// copy onto the buffer.
buf.put(a);
} catch (IOException e) {
throw new RuntimeException(e);
// Note: close is not necessary if flushed and backed by
// byte[].
// } finally {
// try {
// obs.close();
// } catch (IOException e) {
// log.error(e);
// }
}
}
}
if(hasVersionTimestamps) {
buf.putLong(node.getMinimumVersionTimestamp());
buf.putLong(node.getMaximumVersionTimestamp());
}
// Slice onto the coded data record.
final AbstractFixedByteArrayBuffer slice = buf.slice(//
O_origin, buf.pos() - O_origin);
// Read-only coded IDataRecord.
return new ReadOnlyNodeData(slice, encodedKeys);
}
@Override
public AbstractFixedByteArrayBuffer encode(final INodeData node,
final DataOutputBuffer buf) {
return encodeLive(node, buf).data();
}
/**
* A read-only view of the data for a B+Tree node.
*
* Note: The leading byte of the record format codes for a leaf, a double-linked
* leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}.
*
* @author Bryan Thompson
* @version $Id$
*/
static private class ReadOnlyNodeData extends AbstractReadOnlyNodeData
implements INodeData {
/** The backing buffer */
private final AbstractFixedByteArrayBuffer b;
/** The record serialization version. */
private final short version;
// fields which are cached by the ctor.
private final short flags;
private final int nkeys;
private final long nentries;
/**
* Offset of the encoded keys in the buffer.
*/
private final int O_keys;
/**
* The coded keys.
*/
private final ICodedRaba keys;
/**
* Offset of the encoded childAddr[] in the buffer.
*/
private final int O_childAddr;
/**
* Offset of the encoded childEntryCount[] in the buffer.
*
* TODO Compute at runtime to save space as
* O_childAddr + (nkeys + 1) * SIZEOF_ADDR
?
*/
private final int O_childEntryCount;
/** The #of bits in the delta encoding of the childEntryCount[]. */
private final byte childEntryCountBits;
/** The minimum across the childEntryCount[]. */
private final long minChildEntryCount;
final public AbstractFixedByteArrayBuffer data() {
return b;
}
/**
* Constructor used when the caller is encoding the {@link INodeData}.
*
* @param buf
* The buffer containing the data for the node.
* @param keys The coded keys.
*/
public ReadOnlyNodeData(final AbstractFixedByteArrayBuffer buf,
final ICodedRaba keys) {
if (buf == null)
throw new IllegalArgumentException();
if (keys == null)
throw new IllegalArgumentException();
int pos = O_TYPE;
final byte type = buf.getByte(pos);
pos += SIZEOF_TYPE;
switch (type) {
case NODE:
break;
case LEAF:
throw new AssertionError();
case LINKED_LEAF:
throw new AssertionError();
default:
throw new AssertionError("type=" + type);
}
version = buf.getShort(pos);
pos += SIZEOF_VERSION;
switch (version) {
case VERSION0:
case VERSION1:
break;
default:
throw new AssertionError("version=" + version);
}
// flags
flags = buf.getShort(pos);
pos += SIZEOF_FLAGS;
this.nkeys = buf.getInt(pos);
pos += SIZEOF_NKEYS;
if (version == ReadOnlyNodeData.VERSION0) {
this.nentries = buf.getInt(pos);
pos += Bytes.SIZEOF_INT;
} else {
this.nentries = buf.getLong(pos);
pos += Bytes.SIZEOF_LONG;
}
if (nentries < 0) {
/*
* Note: ZERO (0) is permitted for a test case but is not legal
* in live data.
*/
throw new RuntimeException();
}
final int keysSize = buf.getInt(pos);
pos += SIZEOF_KEYS_SIZE;
// O_keys = O_childEntryCount + (nkeys + 1) * SIZEOF_ENTRY_COUNT;
O_keys = pos;
this.keys = keys;//keysCoder.decode(buf.slice(O_keys, keysSize));
pos += keysSize;
// assert b.position() == O_keys + keysSize;
if (keys.size() != nkeys) // sanity check.
throw new RuntimeException("nkeys=" + nkeys + ", keys.size="
+ keys.size());
O_childAddr = pos;
O_childEntryCount = O_childAddr + (nkeys + 1) * SIZEOF_ADDR;
if (version >= ReadOnlyNodeData.VERSION1) {
childEntryCountBits = buf.getByte(O_childEntryCount);
} else {
// Not used in this version.
childEntryCountBits = -1;
}
minChildEntryCount = buf.getLong(O_childEntryCount + 1);
// save reference to buffer
this.b = buf;
}
/**
* Decode in place (wraps a buffer containing an encoded node data record).
*
* @param buf
* The buffer containing the data for the node.
*/
public ReadOnlyNodeData(final AbstractFixedByteArrayBuffer buf,
final IRabaCoder keysCoder) {
if (buf == null)
throw new IllegalArgumentException();
if (keysCoder == null)
throw new IllegalArgumentException();
int pos = O_TYPE;
final byte type = buf.getByte(pos);
pos += SIZEOF_TYPE;
switch (type) {
case NODE:
break;
case LEAF:
throw new AssertionError();
case LINKED_LEAF:
throw new AssertionError();
default:
throw new AssertionError("type=" + type);
}
version = buf.getShort(pos);
pos += SIZEOF_VERSION;
switch (version) {
case VERSION0:
case VERSION1:
break;
default:
throw new AssertionError("version=" + version);
}
// flags
flags = buf.getShort(pos);
pos += SIZEOF_FLAGS;
this.nkeys = buf.getInt(pos);
pos += SIZEOF_NKEYS;
if (version == ReadOnlyNodeData.VERSION0) {
this.nentries = buf.getInt(pos);
pos += Bytes.SIZEOF_INT;
} else {
this.nentries = buf.getLong(pos);
pos += Bytes.SIZEOF_LONG;
}
if (nentries < 0) {
/*
* Note: ZERO (0) is allowed for a unit test, but it is not
* legal in live data.
*/
throw new RuntimeException();
}
final int keysSize = buf.getInt(pos);
pos += SIZEOF_KEYS_SIZE;
// O_keys = O_childEntryCount + (nkeys + 1) * SIZEOF_ENTRY_COUNT;
O_keys = pos;
this.keys = keysCoder.decode(buf.slice(O_keys, keysSize));
pos += keysSize;
// assert b.position() == O_keys + keysSize;
if (keys.size() != nkeys) // sanity check.
throw new RuntimeException("nkeys=" + nkeys + ", keys.size="
+ keys.size());
O_childAddr = pos;
O_childEntryCount = O_childAddr + (nkeys + 1) * SIZEOF_ADDR;
if (version >= ReadOnlyNodeData.VERSION1) {
childEntryCountBits = buf.getByte(O_childEntryCount);
} else {
// Not used in this version.
childEntryCountBits = -1;
}
minChildEntryCount = buf.getLong(O_childEntryCount + 1);
// save reference to buffer
this.b = buf;
}
/**
* The offset into the buffer of the minimum version timestamp, which is
* an int64 field. The maximum version timestamp is the next field. This
* offset is computed dynamically to keep down the size of the node
* object in memory.
*/
private int getVersionTimestampOffset() {
if (version == ReadOnlyNodeData.VERSION0) {
return O_childEntryCount + ((nkeys + 1) * Bytes.SIZEOF_INT);
} else {
/* Compute the offset to the version timestamps based on the
* #of bits required to encode each entry in the child entry
* count array.
*
* nbits := 1 byte
* min := 8 bytes (Long)
* array := BytesUtil.bitFlagByteLength((nkeys + 1)* nbits)
*/
return O_childEntryCount//
+ 1 // one byte whose value is [nbits]
+ Bytes.SIZEOF_LONG // min
+ BytesUtil.bitFlagByteLength((nkeys + 1)
* childEntryCountBits);
}
}
@Override
final public boolean hasVersionTimestamps() {
return ((flags & FLAG_VERSION_TIMESTAMPS) != 0);
}
@Override
final public long getMinimumVersionTimestamp() {
if(!hasVersionTimestamps())
throw new UnsupportedOperationException();
final int off = getVersionTimestampOffset();
// at the offset.
return b.getLong(off);
}
@Override
final public long getMaximumVersionTimestamp() {
if(!hasVersionTimestamps())
throw new UnsupportedOperationException();
final int off = getVersionTimestampOffset() + SIZEOF_TIMESTAMP;
// one long value beyond the offset.
return b.getLong(off);
}
/**
* Always returns false
.
*/
@Override
final public boolean isLeaf() {
return false;
}
/**
* Yes.
*/
@Override
final public boolean isReadOnly() {
return true;
}
/**
* Yes.
*/
@Override
final public boolean isCoded() {
return true;
}
/**
* {@inheritDoc}. This field is cached.
*/
@Override
final public int getKeyCount() {
return nkeys;
}
/**
* {@inheritDoc}. This field is cached.
*/
@Override
final public int getChildCount() {
return nkeys + 1;
}
/**
* {@inheritDoc}. This field is cached.
*/
@Override
final public long getSpannedTupleCount() {
return nentries;
}
/**
* Bounds check.
*
* @throws IndexOutOfBoundsException
* if index is LT ZERO (0)
* @throws IndexOutOfBoundsException
* if index is GT nkeys+1
*/
protected boolean assertChildIndex(final int index) {
if (index < 0 || index > nkeys + 1)
throw new IndexOutOfBoundsException("index=" + index
+ ", nkeys=" + nkeys);
return true;
}
@Override
final public long getChildAddr(final int index) {
assert assertChildIndex(index);
return b.getLong(O_childAddr + index * SIZEOF_ADDR);
}
@Override
final public long getChildEntryCount(final int index) {
assert assertChildIndex(index);
if (version == ReadOnlyNodeData.VERSION0) {
return b.getInt(O_childEntryCount + index * Bytes.SIZEOF_INT);
} else {
// Note: O_childEntryCount is [nbits], which is one byte.
// final long min = b.getLong(O_childEntryCount + 1/* nbits */);
final long bitpos = ((O_childEntryCount + 1 + Bytes.SIZEOF_LONG) << 3)
+ ((long) index * childEntryCountBits);
// if (childEntryCountBits <= 32) {
final long bitIndex = (b.off() << 3) + bitpos;
final long deltat = BytesUtil.getBits64(b.array(),
(int) bitIndex, childEntryCountBits);
return minChildEntryCount + deltat;
// }
// final InputBitStream ibs = b.getInputBitStream();
// try {
//
// ibs.position(bitpos);
//
// final long deltat = ibs
// .readLong(childEntryCountBits/* nbits */);
//
// return minChildEntryCount + deltat;
//
// } catch(IOException ex) {
//
// throw new RuntimeException(ex);
//
// // close not required for IBS backed by byte[] and has high overhead.
//// } finally {
//// try {
//// ibs.close();
//// } catch (IOException ex) {
//// log.error(ex);
//// }
// }
}
}
@Override
final public IRaba getKeys() {
return keys;
}
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append(getClass().getName() + "{");
DefaultNodeCoder.toString(this, sb);
sb.append("}");
return sb.toString();
}
}
/**
* Utility method formats the {@link INodeData}.
*
* @param node
* A node data record.
* @param sb
* The representation will be written onto this object.
*
* @return The sb parameter.
*/
static public StringBuilder toString(final INodeData node,
final StringBuilder sb) {
final int nchildren = node.getChildCount();
sb.append(", nchildren=" + nchildren);
sb.append(", spannedTupleCount=" + node.getSpannedTupleCount());
sb.append(",\nkeys=" + node.getKeys());
{
sb.append(",\nchildAddr=[");
for (int i = 0; i < nchildren; i++) {
if (i > 0)
sb.append(", ");
sb.append(node.getChildAddr(i));
}
sb.append("]");
}
{
sb.append(",\nchildEntryCount=[");
for (int i = 0; i < nchildren; i++) {
if (i > 0)
sb.append(", ");
sb.append(node.getChildEntryCount(i));
}
sb.append("]");
}
if(node.hasVersionTimestamps()) {
sb.append(",\nversionTimestamps={min="
+ node.getMinimumVersionTimestamp() + ",max="
+ node.getMaximumVersionTimestamp() + "}");
}
return sb;
}
}
/*
* @todo old code from NodeSerializer.
*/
// private void putChildAddresses(IAddressManager addressManager,
// DataOutputBuffer os, INodeData node) throws IOException {
//
// final int nchildren = node.getChildCount();
//
// for (int i = 0; i < nchildren; i++) {
//
// final long addr = node.getChildAddr(i);
//
// /*
// * Children MUST have assigned persistent identity.
// */
// if (addr == 0L) {
//
// throw new RuntimeException("Child is not persistent: index="
// + i);
//
// }
//
// os.writeLong(addr);
//
// }
//
// }
//
// private void getChildAddresses(IAddressManager addressManager,
// DataInput is, long[] childAddr, int nchildren) throws IOException {
//
// for (int i = 0; i < nchildren; i++) {
//
// final long addr = is.readLong();
//
// if (addr == 0L) {
//
// throw new RuntimeException(
// "Child does not have persistent address: index=" + i);
//
// }
//
// childAddr[i] = addr;
//
// }
//
// }
//
// /**
// * Write out a packed array of the #of entries spanned by each child of some
// * node.
// *
// * @param os
// * The output stream.
// * @param childEntryCounts
// * The #of entries spanned by each direct child.
// * @param nchildren
// * The #of elements of that array that are defined.
// *
// * @throws IOException
// *
// * @todo customizable serializer interface configured in
// * {@link IndexMetadata}.
// */
// static protected void putChildEntryCounts(final DataOutput os,
// final INodeData node) throws IOException {
//
// final int nchildren = node.getChildCount();
//
// for (int i = 0; i < nchildren; i++) {
//
// final long nentries = node.getChildEntryCount(i);
//
// /*
// * Children MUST span some entries.
// */
// if (nentries == 0L) {
//
// throw new RuntimeException(
// "Child does not span entries: index=" + i);
//
// }
//
// LongPacker.packLong(os, nentries);
//
// }
//
// }
//
// /**
// * Read in a packed array of the #of entries spanned by each child of some
// * node.
// *
// * @param is
// * @param childEntryCounts
// * The #of entries spanned by each direct child.
// * @param nchildren
// * The #of elements of that array that are defined.
// * @throws IOException
// *
// * @todo customizable serializer interface configured in
// * {@link IndexMetadata}.
// */
// static protected void getChildEntryCounts(DataInput is,
// int[] childEntryCounts, int nchildren) throws IOException {
//
// for (int i = 0; i < nchildren; i++) {
//
// final int nentries = (int) LongPacker.unpackLong(is);
//
// if (nentries == 0L) {
//
// throw new RuntimeException(
// "Child does not span entries: index=" + i);
//
// }
//
// childEntryCounts[i] = nentries;
//
// }
//
// }