All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.btree.data.DefaultNodeCoder Maven / Gradle / Ivy

/*

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Aug 28, 2009
 */

package com.bigdata.btree.data;

import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.io.OutputBitStream;

import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;

import com.bigdata.btree.MutableNodeData;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.codec.ICodedRaba;
import com.bigdata.btree.raba.codec.IRabaCoder;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;

/**
 * Default implementation for immutable {@link INodeData} records.
 * 
 * @author Bryan Thompson
 * @version $Id$
 * 
 * @todo partly mutable coded records for {@link INodeData} are feasible. The
 *       only reason to expand an {@link INodeData} into a fully
 *       {@link MutableNodeData} is if we need to modify the keys. The rest of
 *       the fields could be easily patched in place if they were not coded
 *       (they are fixed length fields). Of course, we have a more compact
 *       representation when those fields ARE coded.
 */
public class DefaultNodeCoder implements IAbstractNodeDataCoder,
        Externalizable {

    /**
     * 
     */
    private static final long serialVersionUID = 3998574101917337169L;

	/**
	 * The initial version of the serialized representation of the
	 * {@link DefaultNodeCoder} class (versus the serializer representation of
	 * the node or leaf).
	 */
    private final static transient byte VERSION0 = 0x00;
    
    private IRabaCoder keysCoder;

    @Override
    public void readExternal(final ObjectInput in) throws IOException,
            ClassNotFoundException {

        final byte version = in.readByte();
        switch(version) {
        case VERSION0:
            break;
        default:
            throw new IOException();
        }
        
        keysCoder = (IRabaCoder) in.readObject();
        
    }

    @Override
    public void writeExternal(final ObjectOutput out) throws IOException {

        out.write(VERSION0);

        out.writeObject(keysCoder);
        
    }

    /** No. */
    @Override
    final public boolean isLeafDataCoder() {
        
        return false;
        
    }

    /** Yes. */
    @Override
    public boolean isNodeDataCoder() {

        return true;
        
    }

    @Override
    public String toString() {

        return super.toString() + "{keysCoder=" + keysCoder + "}";

    }
    
    /**
     * De-serialization ctor.
     */
    public DefaultNodeCoder() {
        
    }
    
    /**
     * 
     * @param keysCoder
     *            The {@link IRabaCoder} for the node's keys.
     */
    public DefaultNodeCoder(final IRabaCoder keysCoder) {

        if (keysCoder == null)
            throw new IllegalArgumentException();

        this.keysCoder = keysCoder;

    }

    @Override
    public INodeData decode(final AbstractFixedByteArrayBuffer data) {

        return new ReadOnlyNodeData(data, keysCoder);
        
    }

    @Override
    public INodeData encodeLive(final INodeData node, final DataOutputBuffer buf) {

        if (node == null)
            throw new IllegalArgumentException();

        if (keysCoder == null)
            throw new IllegalArgumentException();

        if (buf == null)
            throw new IllegalArgumentException();

        final short version = ReadOnlyNodeData.currentVersion;
        
        // cache some fields.
        final int nkeys = node.getKeyCount();
        final long nentries = node.getSpannedTupleCount();

        // The byte offset of the start of the coded data in the buffer.
        final int O_origin = buf.pos();
        
        buf.putByte(ReadOnlyNodeData.NODE);

        buf.putShort(version);

		final boolean hasVersionTimestamps = node.hasVersionTimestamps();
		short flags = 0;
		if (hasVersionTimestamps) {
			flags |= AbstractReadOnlyNodeData.FLAG_VERSION_TIMESTAMPS;
		}

        buf.putShort(flags);

		buf.putInt(nkeys);

		if (nentries < 0) {
			/*
			 * Note: This allows ZERO entries in order to support some unit
			 * tests an empty node. However, an empty node is not a legal
			 * data structure in a btree. Only the root leaf may be empty.
			 */
			throw new RuntimeException();
		}
		if (version == ReadOnlyNodeData.VERSION0) {
			if (nentries > Integer.MAX_VALUE)
				throw new UnsupportedOperationException();
			buf.putInt((int) nentries);
		} else {
			buf.putLong(nentries);
		}

        // The offset at which the byte length of the keys will be recorded.
        final int O_keysSize = buf.pos();
        buf.skip(ReadOnlyNodeData.SIZEOF_KEYS_SIZE);
        
        // Write the encoded keys on the buffer.
//        final int O_keys = buf.pos();
        final ICodedRaba encodedKeys = keysCoder
                .encodeLive(node.getKeys(), buf);
//        final AbstractFixedByteArrayBuffer encodedKeysData = encodedKeys.data();

        // Patch the byte length of the coded keys on the buffer.
        buf.putInt(O_keysSize, encodedKeys.data().len());

        // childAddr[] : @todo code childAddr[] (needs IAddressManager if store aware coding).
//        final int O_childAddr = buf.pos();
        for (int i = 0; i <= nkeys; i++) {

            /*
             * See #855 (Child identity is not persistent).
             */
            final long childAddr = node.getChildAddr(i);
            
            if (childAddr == IRawStore.NULL)
                throw new AssertionError("Child is not persistent: index=" + i
                        + " out of " + nkeys + " entries, " + node.toString());
            
            buf.putLong(childAddr);
            
        }
        
//        final int O_childEntryCount = buf.pos();
		if (version == ReadOnlyNodeData.VERSION0) {
			long sum = 0;
			for (int i = 0; i <= nkeys; i++) {
				final long nchildren = node.getChildEntryCount(i);
				if (nchildren < 0)
					throw new AssertionError();
				if (nchildren > Integer.MAX_VALUE)
					throw new UnsupportedOperationException();
				buf.putInt((int) nchildren);
				sum += nchildren;
			}
			if (sum != nentries)
				throw new RuntimeException("spannedTupleCount=" + nentries
						+ ", but sum over children=" + sum);
		} else {
			/*
			 * The min is written out as a full length long value. The per child
			 * entry counts are written out using the minimum #of bits required
			 * to code the data.
			 * 
			 * Note: If min==max then ZERO bits are used per child!
			 * 
			 * The encoding takes:
			 * 
			 * nbits := 1 byte
			 * min   := 8 bytes
			 * array := BytesUtil.bitFlagByteLength((nkeys + 1)* nbits)
			 * 
			 * The encoding is byte aligned so the next data will begin on an
			 * even byte boundary.
			 */
			long min = Long.MAX_VALUE, max = Long.MIN_VALUE;
			long sum = 0;
			for (int i = 0; i <= nkeys; i++) {
				final long nchildren = node.getChildEntryCount(i);
				sum += nchildren;
				if (nchildren < 0) {
					/*
					 * Note: ZERO is permitted for a test case, but is not legal
					 * in live data.
					 */
					throw new RuntimeException();
				}
				if (min > nchildren)
					min = nchildren;
				if (max < nchildren)
					max = nchildren;
			}
			if (sum != nentries)
				throw new RuntimeException("spannedTupleCount=" + nentries
						+ ", but sum over children=" + sum);
			if (sum == 0)
				min = max = 0;
			
            final long delta = max - min;
            assert delta >= 0;

            // will be in [1:64]
			final byte nbits = (byte) (Fast.mostSignificantBit(delta) + 1);

            // one byte.
            buf.putByte((byte) nbits);

            // offset of minVersionTimestamp.
//            O_versionTimestamps = buf.pos();

            // int64
            buf.putLong(min);

//            // int64
//            buf.putLong(max);

            if (nbits > 0) {
                /*
                 * Note: We only write the deltas if 
                 * (min!=max). When min==max, the
                 * deltas are coded in zero bits, so this would be a NOP anyway.
                 */
				final int byteLength = BytesUtil.bitFlagByteLength((nkeys + 1)
						* nbits/* nbits */);
                final byte[] a = new byte[byteLength];
                final OutputBitStream obs = new OutputBitStream(a);
                try {

                    // array of [deltaBits] length fields.
					for (int i = 0; i <= nkeys; i++) {

						final long d = node.getChildEntryCount(i) - min;

						assert d >= 0;

						obs.writeLong(d, nbits);

					}

                    obs.flush();

                    // copy onto the buffer.
                    buf.put(a);

                } catch (IOException e) {
                    throw new RuntimeException(e);
                    // Note: close is not necessary if flushed and backed by
                    // byte[].
                    // } finally {
                    // try {
                    // obs.close();
                    // } catch (IOException e) {
                    // log.error(e);
                    // }
                }
            }

        }
        
        if(hasVersionTimestamps) {
            
            buf.putLong(node.getMinimumVersionTimestamp());

            buf.putLong(node.getMaximumVersionTimestamp());
            
        }

        // Slice onto the coded data record.
        final AbstractFixedByteArrayBuffer slice = buf.slice(//
                O_origin, buf.pos() - O_origin);

        // Read-only coded IDataRecord. 
        return new ReadOnlyNodeData(slice, encodedKeys);
        
    }

    @Override
    public AbstractFixedByteArrayBuffer encode(final INodeData node,
            final DataOutputBuffer buf) {

        return encodeLive(node, buf).data();

    }

    /**
     * A read-only view of the data for a B+Tree node.
     * 

* Note: The leading byte of the record format codes for a leaf, a double-linked * leaf or a node in a manner which is compatible with {@link ReadOnlyNodeData}. * * @author Bryan Thompson * @version $Id$ */ static private class ReadOnlyNodeData extends AbstractReadOnlyNodeData implements INodeData { /** The backing buffer */ private final AbstractFixedByteArrayBuffer b; /** The record serialization version. */ private final short version; // fields which are cached by the ctor. private final short flags; private final int nkeys; private final long nentries; /** * Offset of the encoded keys in the buffer. */ private final int O_keys; /** * The coded keys. */ private final ICodedRaba keys; /** * Offset of the encoded childAddr[] in the buffer. */ private final int O_childAddr; /** * Offset of the encoded childEntryCount[] in the buffer. * * TODO Compute at runtime to save space as * O_childAddr + (nkeys + 1) * SIZEOF_ADDR? */ private final int O_childEntryCount; /** The #of bits in the delta encoding of the childEntryCount[]. */ private final byte childEntryCountBits; /** The minimum across the childEntryCount[]. */ private final long minChildEntryCount; final public AbstractFixedByteArrayBuffer data() { return b; } /** * Constructor used when the caller is encoding the {@link INodeData}. * * @param buf * The buffer containing the data for the node. * @param keys The coded keys. */ public ReadOnlyNodeData(final AbstractFixedByteArrayBuffer buf, final ICodedRaba keys) { if (buf == null) throw new IllegalArgumentException(); if (keys == null) throw new IllegalArgumentException(); int pos = O_TYPE; final byte type = buf.getByte(pos); pos += SIZEOF_TYPE; switch (type) { case NODE: break; case LEAF: throw new AssertionError(); case LINKED_LEAF: throw new AssertionError(); default: throw new AssertionError("type=" + type); } version = buf.getShort(pos); pos += SIZEOF_VERSION; switch (version) { case VERSION0: case VERSION1: break; default: throw new AssertionError("version=" + version); } // flags flags = buf.getShort(pos); pos += SIZEOF_FLAGS; this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; if (version == ReadOnlyNodeData.VERSION0) { this.nentries = buf.getInt(pos); pos += Bytes.SIZEOF_INT; } else { this.nentries = buf.getLong(pos); pos += Bytes.SIZEOF_LONG; } if (nentries < 0) { /* * Note: ZERO (0) is permitted for a test case but is not legal * in live data. */ throw new RuntimeException(); } final int keysSize = buf.getInt(pos); pos += SIZEOF_KEYS_SIZE; // O_keys = O_childEntryCount + (nkeys + 1) * SIZEOF_ENTRY_COUNT; O_keys = pos; this.keys = keys;//keysCoder.decode(buf.slice(O_keys, keysSize)); pos += keysSize; // assert b.position() == O_keys + keysSize; if (keys.size() != nkeys) // sanity check. throw new RuntimeException("nkeys=" + nkeys + ", keys.size=" + keys.size()); O_childAddr = pos; O_childEntryCount = O_childAddr + (nkeys + 1) * SIZEOF_ADDR; if (version >= ReadOnlyNodeData.VERSION1) { childEntryCountBits = buf.getByte(O_childEntryCount); } else { // Not used in this version. childEntryCountBits = -1; } minChildEntryCount = buf.getLong(O_childEntryCount + 1); // save reference to buffer this.b = buf; } /** * Decode in place (wraps a buffer containing an encoded node data record). * * @param buf * The buffer containing the data for the node. */ public ReadOnlyNodeData(final AbstractFixedByteArrayBuffer buf, final IRabaCoder keysCoder) { if (buf == null) throw new IllegalArgumentException(); if (keysCoder == null) throw new IllegalArgumentException(); int pos = O_TYPE; final byte type = buf.getByte(pos); pos += SIZEOF_TYPE; switch (type) { case NODE: break; case LEAF: throw new AssertionError(); case LINKED_LEAF: throw new AssertionError(); default: throw new AssertionError("type=" + type); } version = buf.getShort(pos); pos += SIZEOF_VERSION; switch (version) { case VERSION0: case VERSION1: break; default: throw new AssertionError("version=" + version); } // flags flags = buf.getShort(pos); pos += SIZEOF_FLAGS; this.nkeys = buf.getInt(pos); pos += SIZEOF_NKEYS; if (version == ReadOnlyNodeData.VERSION0) { this.nentries = buf.getInt(pos); pos += Bytes.SIZEOF_INT; } else { this.nentries = buf.getLong(pos); pos += Bytes.SIZEOF_LONG; } if (nentries < 0) { /* * Note: ZERO (0) is allowed for a unit test, but it is not * legal in live data. */ throw new RuntimeException(); } final int keysSize = buf.getInt(pos); pos += SIZEOF_KEYS_SIZE; // O_keys = O_childEntryCount + (nkeys + 1) * SIZEOF_ENTRY_COUNT; O_keys = pos; this.keys = keysCoder.decode(buf.slice(O_keys, keysSize)); pos += keysSize; // assert b.position() == O_keys + keysSize; if (keys.size() != nkeys) // sanity check. throw new RuntimeException("nkeys=" + nkeys + ", keys.size=" + keys.size()); O_childAddr = pos; O_childEntryCount = O_childAddr + (nkeys + 1) * SIZEOF_ADDR; if (version >= ReadOnlyNodeData.VERSION1) { childEntryCountBits = buf.getByte(O_childEntryCount); } else { // Not used in this version. childEntryCountBits = -1; } minChildEntryCount = buf.getLong(O_childEntryCount + 1); // save reference to buffer this.b = buf; } /** * The offset into the buffer of the minimum version timestamp, which is * an int64 field. The maximum version timestamp is the next field. This * offset is computed dynamically to keep down the size of the node * object in memory. */ private int getVersionTimestampOffset() { if (version == ReadOnlyNodeData.VERSION0) { return O_childEntryCount + ((nkeys + 1) * Bytes.SIZEOF_INT); } else { /* Compute the offset to the version timestamps based on the * #of bits required to encode each entry in the child entry * count array. * * nbits := 1 byte * min := 8 bytes (Long) * array := BytesUtil.bitFlagByteLength((nkeys + 1)* nbits) */ return O_childEntryCount// + 1 // one byte whose value is [nbits] + Bytes.SIZEOF_LONG // min + BytesUtil.bitFlagByteLength((nkeys + 1) * childEntryCountBits); } } @Override final public boolean hasVersionTimestamps() { return ((flags & FLAG_VERSION_TIMESTAMPS) != 0); } @Override final public long getMinimumVersionTimestamp() { if(!hasVersionTimestamps()) throw new UnsupportedOperationException(); final int off = getVersionTimestampOffset(); // at the offset. return b.getLong(off); } @Override final public long getMaximumVersionTimestamp() { if(!hasVersionTimestamps()) throw new UnsupportedOperationException(); final int off = getVersionTimestampOffset() + SIZEOF_TIMESTAMP; // one long value beyond the offset. return b.getLong(off); } /** * Always returns false. */ @Override final public boolean isLeaf() { return false; } /** * Yes. */ @Override final public boolean isReadOnly() { return true; } /** * Yes. */ @Override final public boolean isCoded() { return true; } /** * {@inheritDoc}. This field is cached. */ @Override final public int getKeyCount() { return nkeys; } /** * {@inheritDoc}. This field is cached. */ @Override final public int getChildCount() { return nkeys + 1; } /** * {@inheritDoc}. This field is cached. */ @Override final public long getSpannedTupleCount() { return nentries; } /** * Bounds check. * * @throws IndexOutOfBoundsException * if index is LT ZERO (0) * @throws IndexOutOfBoundsException * if index is GT nkeys+1 */ protected boolean assertChildIndex(final int index) { if (index < 0 || index > nkeys + 1) throw new IndexOutOfBoundsException("index=" + index + ", nkeys=" + nkeys); return true; } @Override final public long getChildAddr(final int index) { assert assertChildIndex(index); return b.getLong(O_childAddr + index * SIZEOF_ADDR); } @Override final public long getChildEntryCount(final int index) { assert assertChildIndex(index); if (version == ReadOnlyNodeData.VERSION0) { return b.getInt(O_childEntryCount + index * Bytes.SIZEOF_INT); } else { // Note: O_childEntryCount is [nbits], which is one byte. // final long min = b.getLong(O_childEntryCount + 1/* nbits */); final long bitpos = ((O_childEntryCount + 1 + Bytes.SIZEOF_LONG) << 3) + ((long) index * childEntryCountBits); // if (childEntryCountBits <= 32) { final long bitIndex = (b.off() << 3) + bitpos; final long deltat = BytesUtil.getBits64(b.array(), (int) bitIndex, childEntryCountBits); return minChildEntryCount + deltat; // } // final InputBitStream ibs = b.getInputBitStream(); // try { // // ibs.position(bitpos); // // final long deltat = ibs // .readLong(childEntryCountBits/* nbits */); // // return minChildEntryCount + deltat; // // } catch(IOException ex) { // // throw new RuntimeException(ex); // // // close not required for IBS backed by byte[] and has high overhead. //// } finally { //// try { //// ibs.close(); //// } catch (IOException ex) { //// log.error(ex); //// } // } } } @Override final public IRaba getKeys() { return keys; } @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append(getClass().getName() + "{"); DefaultNodeCoder.toString(this, sb); sb.append("}"); return sb.toString(); } } /** * Utility method formats the {@link INodeData}. * * @param node * A node data record. * @param sb * The representation will be written onto this object. * * @return The sb parameter. */ static public StringBuilder toString(final INodeData node, final StringBuilder sb) { final int nchildren = node.getChildCount(); sb.append(", nchildren=" + nchildren); sb.append(", spannedTupleCount=" + node.getSpannedTupleCount()); sb.append(",\nkeys=" + node.getKeys()); { sb.append(",\nchildAddr=["); for (int i = 0; i < nchildren; i++) { if (i > 0) sb.append(", "); sb.append(node.getChildAddr(i)); } sb.append("]"); } { sb.append(",\nchildEntryCount=["); for (int i = 0; i < nchildren; i++) { if (i > 0) sb.append(", "); sb.append(node.getChildEntryCount(i)); } sb.append("]"); } if(node.hasVersionTimestamps()) { sb.append(",\nversionTimestamps={min=" + node.getMinimumVersionTimestamp() + ",max=" + node.getMaximumVersionTimestamp() + "}"); } return sb; } } /* * @todo old code from NodeSerializer. */ // private void putChildAddresses(IAddressManager addressManager, // DataOutputBuffer os, INodeData node) throws IOException { // // final int nchildren = node.getChildCount(); // // for (int i = 0; i < nchildren; i++) { // // final long addr = node.getChildAddr(i); // // /* // * Children MUST have assigned persistent identity. // */ // if (addr == 0L) { // // throw new RuntimeException("Child is not persistent: index=" // + i); // // } // // os.writeLong(addr); // // } // // } // // private void getChildAddresses(IAddressManager addressManager, // DataInput is, long[] childAddr, int nchildren) throws IOException { // // for (int i = 0; i < nchildren; i++) { // // final long addr = is.readLong(); // // if (addr == 0L) { // // throw new RuntimeException( // "Child does not have persistent address: index=" + i); // // } // // childAddr[i] = addr; // // } // // } // // /** // * Write out a packed array of the #of entries spanned by each child of some // * node. // * // * @param os // * The output stream. // * @param childEntryCounts // * The #of entries spanned by each direct child. // * @param nchildren // * The #of elements of that array that are defined. // * // * @throws IOException // * // * @todo customizable serializer interface configured in // * {@link IndexMetadata}. // */ // static protected void putChildEntryCounts(final DataOutput os, // final INodeData node) throws IOException { // // final int nchildren = node.getChildCount(); // // for (int i = 0; i < nchildren; i++) { // // final long nentries = node.getChildEntryCount(i); // // /* // * Children MUST span some entries. // */ // if (nentries == 0L) { // // throw new RuntimeException( // "Child does not span entries: index=" + i); // // } // // LongPacker.packLong(os, nentries); // // } // // } // // /** // * Read in a packed array of the #of entries spanned by each child of some // * node. // * // * @param is // * @param childEntryCounts // * The #of entries spanned by each direct child. // * @param nchildren // * The #of elements of that array that are defined. // * @throws IOException // * // * @todo customizable serializer interface configured in // * {@link IndexMetadata}. // */ // static protected void getChildEntryCounts(DataInput is, // int[] childEntryCounts, int nchildren) throws IOException { // // for (int i = 0; i < nchildren; i++) { // // final int nentries = (int) LongPacker.unpackLong(is); // // if (nentries == 0L) { // // throw new RuntimeException( // "Child does not span entries: index=" + i); // // } // // childEntryCounts[i] = nentries; // // } // // }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy