src.it.unimi.dsi.compression.CanonicalFast64CodeWordDecoder Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of dsiutils Show documentation

The DSI utilities are a mishmash of classes accumulated during the last twenty years in projects developed at the DSI (Dipartimento di Scienze dell'Informazione, i.e., Information Sciences Department), now DI (Dipartimento di Informatica, i.e., Informatics Department), of the Universita` degli Studi di Milano.

There is a newer version: 2.7.3

Show newest version

package it.unimi.dsi.compression;

/*
 * DSI utilities
 *
 * Copyright (C) 2005-2018 Sebastiano Vigna
 *
 *  This library is free software; you can redistribute it and/or modify it
 *  under the terms of the GNU Lesser General Public License as published by the Free
 *  Software Foundation; either version 3 of the License, or (at your option)
 *  any later version.
 *
 *  This library is distributed in the hope that it will be useful, but
 *  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 *  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
 *  for more details.
 *
 *  You should have received a copy of the GNU Lesser General Public License
 *  along with this program; if not, see .
 *
 */

import it.unimi.dsi.fastutil.booleans.BooleanIterator;
import it.unimi.dsi.io.InputBitStream;

import java.io.IOException;
import java.io.Serializable;

/** A fast table-based decoder for canonical Huffman codes supporting only codes with limited (less than 64 bits) codewords.
 * We use the technique described by Daniel S. Hirschberg and Debra A. Lelewer, “Efficient Decoding of Prefix Codes”,
 * Comm. ACM, 33(4): 449−459, 1990. */

public final class CanonicalFast64CodeWordDecoder implements Decoder, Serializable {
	private static final long serialVersionUID = 1L;

	/** The last codeword in each block of same-length codewords, plus one. */
	private final long[] lastCodeWordPlusOne;
	/** An array parallel to {@link #lastCodeWordPlusOne} specifying the increment in length between codeword lengths
	 * (without repetitions). In particular, the first entry
	 * is the length of the first block of same-length codewords, the second entry is the difference in length
	 * between the second and the first block of same-length codewords, and so on. */
	private final int[] lengthIncrement;
	/** An array parallel to {@link #lastCodeWordPlusOne} specifying how many codewords we have up to a certain block (included). */
	private final int[] howManyUpToBlock;
	/** The symbol assigned to each code word. */
	private final int[] symbol;

	/** Creates a new codeword-based decoder using the given vector of codewords lengths and
	 * a symbol array.
	 *
	 * @param codeWordLength a vector of nondecreasing codeword lengths suitable for a canonical code.
	 * @param symbol a parallel array of symbols corresponding to each codeword length.
	 */
	public CanonicalFast64CodeWordDecoder(final int[] codeWordLength, final int[] symbol) {
		final int size = codeWordLength.length;
		this.symbol = symbol;

		// We compute how many different codeword lengths are present. We check also for excessive or nondecreasing length.
		int howManyLengths = 1;
		if (size > 0)
			for(int i = size - 1; i-- != 0;) {
				if (codeWordLength[i] > Long.SIZE) throw new IllegalArgumentException("Codeword length must not exceed 64");
				if (codeWordLength[i] > codeWordLength[i + 1]) throw new IllegalArgumentException("Codeword lengths must be nondecreasing");
				if (codeWordLength[i] != codeWordLength[i + 1]) howManyLengths++;
			}

		lengthIncrement = new int[howManyLengths];
		howManyUpToBlock = new int[howManyLengths];
		lastCodeWordPlusOne = new long[howManyLengths];

		int p = -1, l, prevL = 0;
		long word = 0;

		for(int i = 0; i < size; i++) {
			l = codeWordLength[i];
			if (l != prevL) {
				if (i != 0) {
					lastCodeWordPlusOne[p] = word;
					howManyUpToBlock[p] = i;
				}
				lengthIncrement[++p] = l - prevL;
				word <<= l - prevL;
				prevL = l;
			}

			word++;
		}

		if (p != -1) {
			howManyUpToBlock[p] = size;
			lastCodeWordPlusOne[howManyLengths - 1] = word;
		}
		else {
			 // This covers the case size = 1
			howManyUpToBlock[0] = 1;
			lastCodeWordPlusOne[0] = 1;
		}
	}

	/** Reads a specified number of bits from a Boolean iterator and stores them into a long.
	 *
	 * @param iterator a Boolean iterator.
	 * @param length the number of bits to read.
	 * @return the bits read, stored into a long: the first read bit will be bit length − 1.
	 */
	private static long readLong(final BooleanIterator iterator, final int length) {
		long x = 0;
		for(int i = length; i-- != 0;) if (iterator.nextBoolean()) x |= 1L << i;
		return x;
	}

	@Override
	public int decode(final BooleanIterator iterator) {
		final int[] lengthIncrement = this.lengthIncrement;
		final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
		int curr = 0, l;
		long x;

		x = readLong(iterator, lengthIncrement[curr]);

		for(;;) {
			if (x < lastCodeWordPlusOne[curr]) return symbol[(int)(howManyUpToBlock[curr] - lastCodeWordPlusOne[curr] + x)];
			l = lengthIncrement[++curr];
			x = x << l | readLong(iterator, l);
		}
	}

	@Override
	public int decode(final InputBitStream ibs) throws IOException {
		final int[] lengthIncrement = this.lengthIncrement;
		final long[] lastCodeWordPlusOne = this.lastCodeWordPlusOne;
		int curr = 0, l;
		long x;

		x = ibs.readLong(lengthIncrement[curr]);

		for(;;) {
			if (x < lastCodeWordPlusOne[curr]) return symbol[(int)(howManyUpToBlock[curr] - lastCodeWordPlusOne[curr] + x)];
			l = lengthIncrement[++curr];
			if (l == 1) x = x << 1 | ibs.readBit();
			else x = x << l | ibs.readLong(l);
		}
	}
}