it.unimi.dsi.compression.CanonicalFast64CodeWordDecoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of dsi-utils Show documentation
Blazegraph Modifications to the DSI utils. This are forked from version 1.10.0 under LGPLv2.1.
There is a newer version: 2.1.4
package it.unimi.dsi.compression;

/*       
 * DSI utilities
 *
 * Copyright (C) 2005-2009 Sebastiano Vigna 
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 2.1 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
 *
 */

import it.unimi.dsi.fastutil.booleans.BooleanIterator;
import it.unimi.dsi.io.InputBitStream;

import java.io.IOException;
import java.io.Serializable;

/** A fast table-based decoder for canonical Huffman codes supporting only codes with limited (less than 64 bits) codewords. 
 * We use the technique described by Daniel S. Hirschberg and Debra A. Lelewer, “Efficient Decoding of Prefix Codes”, 
 * Comm. ACM, 33(4): 449−459, 1990. */

final public class CanonicalFast64CodeWordDecoder implements Decoder, Serializable {
    private static final long serialVersionUID = 1L;
    
    /** The last codeword in each block of same-length codewords, plus one. */
    private final long[] lastCodeWordPlusOne;
    /** An array parallel to {@link #lastCodeWordPlusOne} specifying the increment in length between codeword lengths 
     * (without repetitions). In particular, the first entry
     * is the length of the first block of same-length codewords, the second entry is the difference in length
     * between the second and the first block of same-length codewords, and so on. */
    private final int[] lengthIncrement;
    /** An array parallel to {@link #lastCodeWordPlusOne} specifying how many codewords we have up to a certain block (included). */
    private final int[] howManyUpToBlock;
    /** The symbol assigned to each code word. */
    private final int[] symbol;
        
    /** Creates a new codeword-based decoder using the given vector of codewords lengths and
     * a symbol array.
     * 
     * @param codeWordLength a vector of nondecreasing codeword lengths suitable for a canonical code.
     * @param symbol a parallel array of symbols corresponding to each codeword length.
     */
    public CanonicalFast64CodeWordDecoder( final int[] codeWordLength, final int[] symbol ) {
        final int size = codeWordLength.length;
        this.symbol = symbol;
        
        // We compute how many different codeword lengths are present. We check also for excessive or nondecreasing length.
        int howManyLengths = 1;
        if ( size > 0 )
            for( int i = size - 1; i-- != 0; ) {
                if ( codeWordLength[ i ] > Long.SIZE ) throw new IllegalArgumentException( "Codeword length must not exceed 64" );
                if ( codeWordLength[ i ] > codeWordLength[ i + 1 ] ) throw new IllegalArgumentException( "Codeword lengths must be nondecreasing" );
                if ( codeWordLength[ i ] != codeWordLength[ i + 1 ] ) howManyLengths++; 
            }
        
        lengthIncrement = new int[ howManyLengths ];
        howManyUpToBlock = new int[ howManyLengths ];
        lastCodeWordPlusOne = new long[ howManyLengths ];
        
        int p = -1, l, prevL = 0;
        long word = 0;
        
        for( int i = 0; i < size; i++ ) {
            l = codeWordLength[ i ];
            if ( l != prevL ) {
                if ( i != 0 ) {
                    lastCodeWordPlusOne[ p ] = word;
                    howManyUpToBlock[ p ] = i; 
                }
                lengthIncrement[ ++p ] = l - prevL;
                word <<= l - prevL;
                prevL = l;
            }
            
            word++;
        }
        
        if ( p != -1 ) {
            howManyUpToBlock[ p ] = size;
            lastCodeWordPlusOne[ howManyLengths - 1 ] = word;
        }
        else {
             // This covers the case size = 1
            howManyUpToBlock[ 0 ] = 1;
            lastCodeWordPlusOne[ 0 ] = 1;
        }
    }

    /** Reads a specified number of bits from a Boolean iterator and stores them into a long.
     * 
     * @param iterator a Boolean iterator.
     * @param length the number of bits to read.
     * @return the bits read, stored into a long: the first read bit will be bit length − 1.
     */
    private static long readLong( final BooleanIterator iterator, final int length ) {
        long x = 0;
        for( int i = length; i-- != 0; ) if ( iterator.nextBoolean() ) x |= 1L << i;
        return x;
    }
    
    public int decode( final BooleanIterator iterator ) {
        final int[] lengthIncrement = this.lengthIncrement;
        final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
        int curr = 0, l; 
        long x;

        x = readLong( iterator, lengthIncrement[ curr ] );
        
        for(;;) {
            if ( x < lastCodeWordPlusOne[ curr ] ) return symbol[ (int)( howManyUpToBlock[ curr ] - lastCodeWordPlusOne[ curr ] + x ) ];
            l = lengthIncrement[ ++curr ];
            x = x << l | readLong( iterator, l );
        }
    }

    public int decode( final InputBitStream ibs ) throws IOException {
        final int[] lengthIncrement = this.lengthIncrement;
        final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
        int curr = 0, l; 
        long x;

        x = ibs.readLong( lengthIncrement[ curr ] );
        
        for(;;) {
            if ( x < lastCodeWordPlusOne[ curr ] ) return symbol[ (int)( howManyUpToBlock[ curr ] - lastCodeWordPlusOne[ curr ] + x ) ];
            l = lengthIncrement[ ++curr ];
            if ( l == 1 ) x = x << 1 | ibs.readBit();
            else x = x << l | ibs.readLong( l );
        }
    }
}