com.bigdata.htree.NodeSerializer Maven / Gradle / Ivy

Go to download
/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Nov 5, 2006
 */
package com.bigdata.htree;

import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;

import com.bigdata.BigdataStatics;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.btree.data.AbstractReadOnlyNodeData;
import com.bigdata.btree.data.DefaultLeafCoder;
import com.bigdata.btree.data.IAbstractNodeData;
import com.bigdata.btree.data.IAbstractNodeDataCoder;
import com.bigdata.btree.data.ILeafData;
import com.bigdata.btree.data.INodeData;
import com.bigdata.htree.data.DefaultDirectoryPageCoder;
import com.bigdata.htree.data.IDirectoryData;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.io.FixedByteArrayBuffer;
import com.bigdata.io.IDataRecord;
import com.bigdata.io.compression.IRecordCompressor;
import com.bigdata.io.compression.IRecordCompressorFactory;
import com.bigdata.io.compression.NOPRecordCompressor;
import com.bigdata.rawstore.IAddressManager;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.util.Bytes;

/**
 * 
 * An instance of this class is used to serialize and de-serialize the
 * {@link INodeData}s and {@link ILeafData}s of an {@link AbstractBTree}. Leaf
 * and non-leaf records have different serialization formats, but their leading
 * bytes use the same format so that you can tell by inspection whether a buffer
 * contains a leaf or a non-leaf node. The header of the record uses a fixed
 * length format so that some fields can be tested without full
 * de-serialization, especially whether the record contains a leaf vs a node.
 * This fixed record also makes it possible to update some fields in the header
 * once the entire record has been serialized, including the checksum, the #of
 * bytes in the serialized record, and the prior/next addresses for leaves.
 * 
 * 
 * The methods defined by this class all work with {@link ByteBuffer}s. On read,
 * the buffer must be positioned to the start of the data to be read. After a
 * read, the buffer will be positioned to the first byte after the data read. If
 * there is insufficient data available in the buffer then an
 * {@link BufferUnderflowException} will be thrown. On write, the data will be
 * written on an internal buffer whose size is automatically extended. The write
 * buffer is reused for each write and quickly achieves a maximum size for any
 * given {@link BTree}.
 * 
 * 
 * Note: while the {@link NodeSerializer} is NOT thread-safe for writers, it is
 * thread-safe for readers. This design mirrors the concurrency capabilities of
 * the {@link AbstractBTree}.
 * 
 * 
 * @author Bryan Thompson
 * @version $Id: NodeSerializer.java 3305 2010-07-27 15:08:28Z thompsonbry $
 * 
 * @see AbstractBTree
 * @see IndexMetadata
 * @see IAbstractNodeData
 * @see IAbstractNodeDataCoder
 */
public class NodeSerializer {

	/**
	 * An object that knows how to construct {@link DirectoryPage}s and
	 * {@link BucketPage}s from {@link IDirectoryData} and {@link ILeafData}
	 * objects.
	 */
	protected final INodeFactory nodeFactory;

    /**
     * When true the {@link NodeSerializer} instance will refuse to
     * {@link #encode(IAbstractNodeData)} nodes or leaves (this keeps us from
     * allocating the {@link #_writeBuffer}). Note that this MUST be
     * false for a transient B+Tree so we can convert its nodes and
     * leaves into coded data records as they are evicted from the write
     * retention queue.
     */
    private final boolean readOnly;

    /**
     * Used to code the nodes.
     */
    final IAbstractNodeDataCoder nodeCoder;
    
    /**
     * Used to code the nodes.
     */
    final IAbstractNodeDataCoder leafCoder;
    
    /**
     * Factory for record-level (de-)compression of nodes and leaves (optional).
     */
    private final IRecordCompressorFactory recordCompressorFactory;
    
    /**
     * An object that knows how to (de-)compress a node or leaf (optional).
     */
    private IRecordCompressor getRecordCompressor() {
        
        if (recordCompressorFactory == null) {
            
            return NOPRecordCompressor.INSTANCE;
            
        }

        if (!readOnly) {
            
            assert _writeCompressor != null;
            
            // Instance used for writes, which are single threaded.
            return _writeCompressor;
            
        }
        
        return recordCompressorFactory.getInstance();
        
    }

    /**
     * Used to serialize the nodes and leaves of the tree. This is pre-allocated
     * based on the estimated maximum size of a node or leaf and grows as
     * necessary when it overflows. The same buffer instance is used to
     * serialize all nodes and leaves of the tree.
     * 
     * Note: this buffer is discarded by {@link #close()} when the btree is
     * {@link AbstractBTree#close() closed} and then reallocated on demand.
     * 

     * Note: It is important that this field NOT be used for a read-only
     * {@link BTree} since only mutable {@link BTree}s are single threaded -
     * concurrent readers are allowed for read-only btrees.
     * 
     * @see #allocWriteBuffer()
     * @see #close()
     */
    private DataOutputBuffer _writeBuffer;

    /**
     * Instance used for writes - this keeps a hard reference since writes are
     * single threaded.
     */
    private IRecordCompressor _writeCompressor;
    
    private final int initialBufferCapacity;

    /**
     * The default initial capacity multiplier for the (de-)serialization buffer.
     * The total initial buffer capacity is this value times the
     * effective branching factor, which is computed from the addressBits
     * constructor parameter.
     */
    public static final transient int DEFAULT_BUFFER_CAPACITY_PER_ENTRY = Bytes.kilobyte32 / 4;
    
    /**
     * Constructor is disallowed.
     */
    @SuppressWarnings("unused")
    private NodeSerializer() {

        throw new UnsupportedOperationException();

    }

    /**
     * Designated constructor.
     * 
     * @param nodeFactory
     *            An object that knows how to construct {@link INodeData}s and
     *            {@link ILeafData leaves}.
     * 
     * @param addressBits
     *            The #of address bits for target {@link HTree}.
     * 
     * @param initialBufferCapacity
     *            The initial capacity for internal buffer used to serialize
     *            nodes and leaves. The buffer will be resized as necessary
     *            until it is sufficient for the records being serialized for
     *            the {@link HTree}. When zero (0), a default is used. A
     *            non-zero value is worth specifying only when the actual buffer
     *            size is consistently less than the default for some
     *            {@link HTree}. See {@link #DEFAULT_BUFFER_CAPACITY_PER_ENTRY}
     * 
     * @param indexMetadata
     *            The {@link IndexMetadata} record for the index.
     * 
     * @param readOnly
     *            true IFF the caller is asserting that they WILL
     *            NOT attempt to serialize any nodes or leaves using this
     *            {@link NodeSerializer} instance.
     * 
     *            FIXME {@link IRecordCompressorFactory} should either be used
     *            here or moved into the {@link IRawStore} impl.
     * 
     * @todo the {@link IAddressManager} is not used any more. It was used by
     *       the {@link IAddressSerializer}.
     */
    public NodeSerializer(//
            final IAddressManager addressManager,
            final INodeFactory nodeFactory,//
            final int addressBits,//
            final int initialBufferCapacity, //
            final IndexMetadata indexMetadata,//
            final boolean readOnly,//
            final IRecordCompressorFactory recordCompressorFactory
            ) {
        
        assert nodeFactory != null;

        assert initialBufferCapacity >= 0;

        assert indexMetadata != null;
        
        this.nodeFactory = nodeFactory;

        this.readOnly = readOnly;

		/*
		 * We are using a specialized coder for the directory pages since they
		 * do not share many characteristics with a BTree non-leaf node. In
		 * particular, there are no *keys* in a DirectoryPage and we do not
		 * current store spanned entry counts or other metadata. Right now, the
		 * only information in a DirectoryPage is the address map.
		 * 
		 * @todo parameter for the node/leaf coder impls or the NodeSerializer?
		 */
        this.nodeCoder = new DefaultDirectoryPageCoder();

        if (!indexMetadata.getTupleSerializer().getLeafKeysCoder().isDuplicateKeys()) {
            
            /*
             * This constraint *could* be relaxed, but the HTree API presumes
             * that we can have duplicate keys and this check verifies tha the
             * keys coder supports duplicate keys.
             */

            throw new IllegalArgumentException(
                    "The leaf keys coder for HTree should allow duplicate keys.");

        }
        
		/*
		 * Note: We are using the same leaf coder class as the BTree.
		 */
        this.leafCoder = new DefaultLeafCoder(indexMetadata
                .getTupleSerializer().getLeafKeysCoder(), indexMetadata
                .getTupleSerializer().getLeafValuesCoder());
        
        // MAY be null
        this.recordCompressorFactory = recordCompressorFactory;
        
        if (readOnly) {

            this.initialBufferCapacity = 0;
            
            this._writeBuffer = null;
            
        } else {
            
            if (initialBufferCapacity == 0) {
                
                // The effective branching factor.
                final int branchingFactor = (1 << addressBits);
                
                this.initialBufferCapacity = DEFAULT_BUFFER_CAPACITY_PER_ENTRY
                        * branchingFactor;

            } else {

                this.initialBufferCapacity = initialBufferCapacity;
                
            }

            // allocate initial write buffer.
            allocWriteBuffer();

        }

    }

    /**
     * Releases any buffers. They will be automatically reallocated if the
     * {@link NodeSerializer} is used again.
     */
    public void close() {

        _writeBuffer = null;
        
        _writeCompressor = null;

    }

    /**
     * Allocates {@link #_writeBuffer} with {@link #initialBufferCapacity}.
     * 
     * @throws UnsupportedOperationException
     *             if the {@link NodeSerializer} does not permit writes.
     */
    private void allocWriteBuffer() {

        if (readOnly) {

            throw new UnsupportedOperationException();
            
        }

        assert _writeBuffer == null;

        assert _writeCompressor == null;

        _writeBuffer = new DataOutputBuffer(initialBufferCapacity);
        
        _writeCompressor = recordCompressorFactory == null ? NOPRecordCompressor.INSTANCE
                : recordCompressorFactory.getInstance();

    }

    /**
     * Decode an {@link INodeData} or {@link ILeafData} record, wrapping the
     * underlying data record (thread-safe). The decision to decode as an
     * {@link INodeData} or {@link ILeafData} instance is made based on
     * inspection of the first byte byte in the supplied buffer, which codes for
     * a node, leaf, or linked-leaf.
     * 
     * @param buf
     *            The data record.
     * 
     * @return A {@link INodeData} or {@link ILeafData} instance for that data
     *         record.
     * 
     *         FIXME modify to accept {@link IDataRecord} rather than
     *         {@link ByteBuffer}.
     */
    public IAbstractNodeData decode(final ByteBuffer buf) {

        if (buf == null)
            throw new IllegalArgumentException();

        final boolean isNode = AbstractReadOnlyNodeData.isNode(buf
                .get(AbstractReadOnlyNodeData.O_TYPE));

        // FIXME should be done at the store level during decompress.
        final AbstractFixedByteArrayBuffer slice;
        if (!buf.hasArray()) {
            // backing array is not accessible, so copy into new byte[].
            final byte[] tmp = new byte[buf.remaining()];
            buf.get(tmp);
            slice = FixedByteArrayBuffer.wrap(tmp);
            if(BigdataStatics.debug)
                System.err.print("[RO]");
        } else {
            // backing array is accessible, so wrap as slice.
            slice = new FixedByteArrayBuffer(buf.array(), buf.arrayOffset(),
                    buf.capacity());
        }
        
        if(isNode) {

            return nodeCoder.decode(slice);
            
        }

        return leafCoder.decode(slice);

    }

    /**
     * Wrap an {@link INodeData} or {@link ILeafData} instance as a {@link Node}
     * or a {@link Leaf}. This DOES NOT set the parent of the new {@link Node}
     * or {@link Leaf}.
     * 
     * @param btree
     *            The owning B+Tree.
     * @param addr
     *            The address of the data record in the backing store.
     * @param data
     *            The data record.
     *            
     * @return The node or leaf.
     */
    public AbstractPage wrap(final AbstractHTree btree, final long addr,
            final IAbstractNodeData data) {

        //        assert btree != null;
        //        assert addr != 0L;

        if (data.isLeaf()) {

            // wrap data record as Leaf.
            return nodeFactory.allocLeaf(btree, addr, (ILeafData) data);

        } else {

            // wrap data record as Node.
            return nodeFactory.allocNode(btree, addr, (IDirectoryData) data);

        }

    }

    /**
     * Encode a node or leaf and return a coded instance of the persistent data
     * for that node or leaf backed by an exact fit byte[] (NOT thread-safe).
     * The operation writes on an internal buffer which is automatically
     * extended as required.
     * 
     * @param node
     *            The node or leaf.
     * 
     * @return The buffer containing the coded data record for the node or leaf
     *         in an exact fit byte[] owned by the coded node or leaf.
     */
    @SuppressWarnings("unchecked")
    public  T encodeLive(final T node) {

        if (node == null)
            throw new IllegalArgumentException();

        if (node.isCoded()) {
        
            // already coded.
            throw new IllegalStateException();

        }
        
        if (_writeBuffer == null) {
         
            // re-allocate.
            allocWriteBuffer();
            
        } else {
        
            _writeBuffer.reset();
        
        }

        final T codedNode;
        if(node.isLeaf()) {

            codedNode = (T) leafCoder
                    .encodeLive((ILeafData) node, _writeBuffer);

        } else {

            codedNode = (T) nodeCoder
                    .encodeLive((IDirectoryData) node, _writeBuffer);

        }

        /*
         * Trim the backing byte[] buffer to an exact fit. All of the slice()s
         * based on that buffer will reference the new backing byte[]. trim()
         * returns the _old_ backing byte[], which we then wrap and use to
         * replace the write buffer. This allows the write buffer to grow until
         * it "fits" the coded data records, while preserving exact fit byte[]s
         * for each coded node or leaf whose slices for the coded keys and
         * values remain tied to the exact fit byte[].
         */
        _writeBuffer = new DataOutputBuffer(0/* len */, _writeBuffer.trim());

//        if (node.isLeaf() && leafCoder instanceof CanonicalHuffmanRabaCoder) {
//            // hack updates reference into the backing byte[]. 
//            ((CanonicalHuffmanRabaCoder.CodedRabaImpl) ((ILeafData) codedNode)
//                    .getValues()).trimmedSlice();
//        }
        
        // Return the coded node or leaf.
        return codedNode;

    }

    /**
     * Encode a node or leaf onto an internal buffer and return that buffer (NOT
     * thread-safe). This is a slight optimization of
     * {@link #encodeLive(IAbstractNodeData)} which is used when the written
     * node or leaf will not be reused, e.g., by the {@link IndexSegmentBuilder}
     * . The operation writes on an internal buffer which is automatically
     * extended as required.
     * 
     * @param node
     *            The node or leaf.
     * 
     * @return The buffer containing the coded data record for the node or leaf
     *         in a shared buffer. The contents of this buffer may be
     *         overwritten by the next node or leaf serialized the same instance
     *         of this class. The position will be zero and the limit will be
     *         the #of bytes in the coded representation.
     * 
     * @deprecated This method is no longer used since I refactored the
     *             {@link IndexSegmentBuilder} to optionally stuff the generated
     *             nodes and leaves into the cache. It still works but it might
     *             go away in the future.
     */
    public AbstractFixedByteArrayBuffer encode(final IAbstractNodeData node) {

        if (node == null)
            throw new IllegalArgumentException();

        if (node.isCoded()) {

            // already coded.
            throw new IllegalStateException();

        }

        if (_writeBuffer == null) {

            // re-allocate.
            allocWriteBuffer();

        } else {

            _writeBuffer.reset();

        }

        if (node.isLeaf()) {

            return leafCoder.encode((ILeafData) node, _writeBuffer);

        } else {

            return nodeCoder.encode((IDirectoryData) node, _writeBuffer);

        }

    }

    /**
     * Update the serialization of a leaf to set the prior and next leaf
     * references and change its serialization type from {@link #TYPE_LEAF} to
     * {@link #TYPE_LINKED_LEAF}.
     * 

     * Note: In order to use this method to write linked leaves on the store you
     * have to either write behind at a pre-determined address on the store or
     * settle for writing only the prior or the next leaf address, but not both.
     * It is up to the caller to perform these tricks. All this method does is
     * to touch up the serialized record.
     * 
     * Note: This method has NO side-effects on the position or
     * limit of the caller's {@link ByteBuffer}.
     * 
     * @param b
     *            The serialization leaf.
     * @param priorAddr
     *            The address of the previous leaf in key order, 0L
     *            if it is known that there is no previous leaf, and
     *            -1L if either: (a) it is not known whether there
     *            is a previous leaf; or (b) it is known but the address of that
     *            leaf is not known to the caller.
     * @param nextAddr
     *            The address of the next leaf in key order, 0L if
     *            it is known that there is no next leaf, and -1L
     *            if either: (a) it is not known whether there is a next leaf;
     *            or (b) it is known but the address of that leaf is not known
     *            to the caller.
     * 
     * @see IndexSegmentBuilder
     * @see DefaultLeafCoder
     */
    public void updateLeaf(final ByteBuffer b, final long priorAddr,
            final long nextAddr) {

        if (AbstractReadOnlyNodeData.isNode(b
                .get(AbstractReadOnlyNodeData.O_TYPE))) {

            throw new UnsupportedOperationException("Not a leaf.");

        }

        b.putLong(AbstractReadOnlyNodeData.O_PRIOR, priorAddr);

        b.putLong(AbstractReadOnlyNodeData.O_NEXT, nextAddr);

    }

}