src.it.unimi.dsi.compression.CanonicalFast64CodeWordDecoder Maven / Gradle / Ivy

Go to download
package it.unimi.dsi.compression;

/*
 * DSI utilities
 *
 * Copyright (C) 2005-2017 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.booleans.BooleanIterator;
import it.unimi.dsi.io.InputBitStream;

import java.io.IOException;
import java.io.Serializable;

/** A fast table-based decoder for canonical Huffman codes supporting only codes with limited (less than 64 bits) codewords.
 * We use the technique described by Daniel S. Hirschberg and Debra A. Lelewer, “Efficient Decoding of Prefix Codes”,
 * Comm. ACM, 33(4): 449−459, 1990. */

public final class CanonicalFast64CodeWordDecoder implements Decoder, Serializable {
	private static final long serialVersionUID = 1L;

	/** The last codeword in each block of same-length codewords, plus one. */
	private final long[] lastCodeWordPlusOne;
	/** An array parallel to {@link #lastCodeWordPlusOne} specifying the increment in length between codeword lengths
	 * (without repetitions). In particular, the first entry
	 * is the length of the first block of same-length codewords, the second entry is the difference in length
	 * between the second and the first block of same-length codewords, and so on. */
	private final int[] lengthIncrement;
	/** An array parallel to {@link #lastCodeWordPlusOne} specifying how many codewords we have up to a certain block (included). */
	private final int[] howManyUpToBlock;
	/** The symbol assigned to each code word. */
	private final int[] symbol;

	/** Creates a new codeword-based decoder using the given vector of codewords lengths and
	 * a symbol array.
	 *
	 * @param codeWordLength a vector of nondecreasing codeword lengths suitable for a canonical code.
	 * @param symbol a parallel array of symbols corresponding to each codeword length.
	 */
	public CanonicalFast64CodeWordDecoder(final int[] codeWordLength, final int[] symbol) {
		final int size = codeWordLength.length;
		this.symbol = symbol;

		// We compute how many different codeword lengths are present. We check also for excessive or nondecreasing length.
		int howManyLengths = 1;
		if (size > 0)
			for(int i = size - 1; i-- != 0;) {
				if (codeWordLength[i] > Long.SIZE) throw new IllegalArgumentException("Codeword length must not exceed 64");
				if (codeWordLength[i] > codeWordLength[i + 1]) throw new IllegalArgumentException("Codeword lengths must be nondecreasing");
				if (codeWordLength[i] != codeWordLength[i + 1]) howManyLengths++;
			}

		lengthIncrement = new int[howManyLengths];
		howManyUpToBlock = new int[howManyLengths];
		lastCodeWordPlusOne = new long[howManyLengths];

		int p = -1, l, prevL = 0;
		long word = 0;

		for(int i = 0; i < size; i++) {
			l = codeWordLength[i];
			if (l != prevL) {
				if (i != 0) {
					lastCodeWordPlusOne[p] = word;
					howManyUpToBlock[p] = i;
				}
				lengthIncrement[++p] = l - prevL;
				word <<= l - prevL;
				prevL = l;
			}

			word++;
		}

		if (p != -1) {
			howManyUpToBlock[p] = size;
			lastCodeWordPlusOne[howManyLengths - 1] = word;
		}
		else {
			 // This covers the case size = 1
			howManyUpToBlock[0] = 1;
			lastCodeWordPlusOne[0] = 1;
		}
	}

	/** Reads a specified number of bits from a Boolean iterator and stores them into a long.
	 *
	 * @param iterator a Boolean iterator.
	 * @param length the number of bits to read.
	 * @return the bits read, stored into a long: the first read bit will be bit length − 1.
	 */
	private static long readLong(final BooleanIterator iterator, final int length) {
		long x = 0;
		for(int i = length; i-- != 0;) if (iterator.nextBoolean()) x |= 1L << i;
		return x;
	}

	@Override
	public int decode(final BooleanIterator iterator) {
		final int[] lengthIncrement = this.lengthIncrement;
		final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
		int curr = 0, l;
		long x;

		x = readLong(iterator, lengthIncrement[curr]);

		for(;;) {
			if (x < lastCodeWordPlusOne[curr]) return symbol[(int)(howManyUpToBlock[curr] - lastCodeWordPlusOne[curr] + x)];
			l = lengthIncrement[++curr];
			x = x << l | readLong(iterator, l);
		}
	}

	@Override
	public int decode(final InputBitStream ibs) throws IOException {
		final int[] lengthIncrement = this.lengthIncrement;
		final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
		int curr = 0, l;
		long x;

		x = ibs.readLong(lengthIncrement[curr]);

		for(;;) {
			if (x < lastCodeWordPlusOne[curr]) return symbol[(int)(howManyUpToBlock[curr] - lastCodeWordPlusOne[curr] + x)];
			l = lengthIncrement[++curr];
			if (l == 1) x = x << 1 | ibs.readBit();
			else x = x << l | ibs.readLong(l);
		}
	}
}