All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.io.BitOutput Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
package com.aliasi.io;

import com.aliasi.util.Math;

import java.io.OutputStream;
import java.io.IOException;

/** 
 * A BitOutput wraps an underlying output stream to
 * provide bit-level output.  Output is written through the method
 * {@link #writeBit(boolean)}, with true used for the bit
 * 1 and false for the bit 0.
 * The methods {@link #writeTrue()} and {@link #writeFalse()} are
 * shorthand for writeBit(true) and
 * writeBit(false) respectively.
 *
 * 

If the number of bits written before closing the output does not * land on a byte boundary, the remaining fractional byte is filled * with 0 bits. * *

None of the methods in this class are safe for concurrent access * by multiple threads. * * @author Bob Carpenter * @version 2.1.1 * @since LingPipe2.1.1 */ public class BitOutput { private int mNextByte; private int mNextBitIndex; private final OutputStream mOut; /** * Construct a bit output wrapping the specified output stream. * * @param out Underlying output stream. */ public BitOutput(OutputStream out) { mOut = out; reset(); } /** * Writes the bits for a unary code for the specified positive * number. The unary code for the number n is * defined by: * *

* unaryCode(n) = 0n-1 1 *
* * In words, the number n is coded as * n-1 zeros followed by a one. The following * table illustrates the first few unary codes: * *
* * * * * * *
NumberCode
11
201
3001
40001
500001
* * @param n Number to code. * @throws IOException If there is an I/O error writing * to the underlying output stream. * @throws IllegalArgumentException If the number to be encoded is * zero or negative. */ public void writeUnary(int n) throws IOException { validatePositive(n); // fit in buffer int numZeros = n - 1; if (numZeros <= mNextBitIndex) { mNextByte = mNextByte << numZeros; mNextBitIndex -= numZeros; writeTrue(); return; } // fill buffer, write and flush // numZeros > mNextBitIndex mOut.write(mNextByte << mNextBitIndex); numZeros -= (mNextBitIndex+1); reset(); // fill in even multiples of eight for (; numZeros >= 8; numZeros -= 8) mOut.write(ZERO_BYTE); // fill in last zeros mNextBitIndex -= numZeros; writeTrue(); } /** * Writes the bits of a binary representation of the specified * non-negative number in the specified number of bits. if the * number will not fit in the number of bits specified, an * exception is raised. * *

For instance, the following illustrates one, two and * three-bit codings. * *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
NumberBinaryCode for Num Bits
1 2 3
01000000
11101001
210Exception10010
310Exception11011
4100ExceptionException100
5101ExceptionException101
6110ExceptionException110
7111ExceptionException111
81000ExceptionExceptionException
* * @param n Number to code. * @param numBits Number of bits to use for coding. * @throws IllegalArgumentException If the number to code is * negative, the number of bits is greater than 63, or the number * will not fit into the specified number of bits. * @throws IOException If there is an error writing to the * underlying output stream. */ public void writeBinary(long n, int numBits) throws IOException { validateNonNegative(n); validateNumBits(numBits); int k = mostSignificantPowerOfTwo(n); if (k >= numBits) { String msg = "Number will not fit into number of bits." + " n=" + n + " numBits=" + numBits; throw new IllegalArgumentException(msg); } writeLowOrderBits(numBits,n); } /** * Writes the bits for Rice code for the specified non-negative * number with the specified number of bits fixed for the binary * remainder. Rice coding is a form of Golomb coding where the * Golomb paramemter is a power of two (2 to the number of bits in * the remainder). The Rice code is defined by unary coding a * magnitude and then binary coding the remainder. It can be * defined by taking a quotient and remainder: * *
* * * * * * * *
m = 2b= (1<<b)
q = (n - 1) / m= (n - 1) >>> b
r = n - q*m - 1= n - (q << b) - 1
*
* * both of which are defined by shifting, and then coding each * in turn using a unary code for the quotient and binary code for * the remainder: * *
* riceCode(n,b) = unaryCode(q) binaryCode(r) *
* * For example, we get the following codes with the number of * fixed remainder bits set to 1, 2 and 3, with the unary coded * quotient separated from the binary coded remainder by a space: * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
Number
n
BinaryCode for Number of Remainder Bits
b=1 b=2 b=3
1 11 0 1 00 1 000
2 101 1 1 01 1 001
3 1101 0 1 10 1 010
4 10001 1 1 11 1 011
5 101001 0 01 00 1 100
6 110001 1 01 01 1 101
7 1110001 0 01 10 1 110
8 10000001 1 01 11 1 111
9 100100001 0 001 00 01 000
10 101000001 1 001 01 01 001
11 1011000001 0 001 10 01 010
12 1100000001 1 001 11 01 011
13 11010000001 0 0001 00 01 100
14 11100000001 1 0001 01 01 101
15 111100000001 0 0001 10 01 110
16 1000000000001 1 0001 11 01 111
17 10001000000001 0 00001 00 001 000
* * In the limit, if the number of remaining bits to code is set to * zero, the Rice code would reduce to a unary code: * *
* riceCode(n,0) = unaryCode(n) *
* * but this method will throw an exception with a remainder size * of zero. * *

In the limit the other way, if the number of remaining bits * is set to the width of the maximum value, the Rice code is just * the unary coding of 1, which is the single binary digit 1, * followed by the binary code itself: * *

* riceCode(n,64) = unaryCode(1) binaryCode(n,64) = 1 binaryCode(n,64) *
* *

The method will throw an exception if the encoding * produces a unary code that would output more bits * than would fit in a positive integer (that is, more * than (232-1) bits. * * For more information, see: * *

    * *
  • Golomb, S. 1966. Run-length encodings. IEEE * Trans. Inform. Theory. 12(3):399-401. * *
  • Rice, R. F. 1979. Some practical universal noiseless * coding techniques. JPL Publication 79-22. March 1979. * *
  • Witten, Ian H., Alistair Moffat, and Timothy C. Bell. * 1999. Managing Gigabytes. Academic Press. * *
  • Wikipedia: Golomb coding * *
* * @param n Number to code. * @param numFixedBits Number of bits to use for the fixed * remainder encoding. * @throws IOException If there is an error writing to the * underlying output stream. * @throws IllegalArgumentException If the number to be encoded is * not positive, if the number of fixed bits is not positive, or if * the unary prefix code overflows. */ public void writeRice(long n, int numFixedBits) throws IOException { validatePositive(n); validateNumBits(numFixedBits); long q = (n - 1l) >> numFixedBits; long prefixBits = q + 1l; if (prefixBits >= Integer.MAX_VALUE) { String msg = "Prefix too long to code." + " n=" + n + " numFixedBits=" + numFixedBits + " number of prefix bits=(n>>numFixBits)=" + prefixBits; throw new IllegalArgumentException(msg); } writeUnary((int) prefixBits); long remainder = n - (q << numFixedBits) - 1; writeLowOrderBits(numFixedBits,remainder); } /** * Writes the Fibonacci code for the specified positive number. * Roughly speaking, the Fibonacci code specifies a number * as a sum of non-consecutive Fibonacci numbers, terminating * a representation with two consecutive 1 bits. * *

Fibonacci * numbers are defined by setting * *

     * Fib(0) = 0
     * Fib(1) = 1
     * Fib(n+2) = Fib(n+1) + Fib(n)
     * 
* * The first few Fibonacci numbers are: * *
* 0, 1, 1, 2, 3, 5, 8, 13, 21, ... *
* * This method starts with the second 1 value, * namely Fib(2), making the sequence a sequence * of unique numbers starting with 1, 2, 3, 5,.... * *

The Fibonacci representation of a number is a bit vector * indicating the Fibonacci numbers used in the sum. The * Fibonacci code reverses the Fibonacci representation and * appends a 1 bit. Here are examples for the first 17 numbers: * *

* * * * * * * * * * * * * * * * * * * *
Number Fibonacci RepresentationFibonacci Code
1 1 11
2 10 01 1
3 100 001 1
4 101 101 1
5 1000 0001 1
6 1001 1001 1
7 1010 0101 1
8 10000 00001 1
9 10001 10001 1
10 10010 01001 1
11 10100 00101 1
12 10101 10101 1
13 100000 000001 1
14 100001 100001 1
15 100010 010001 1
16 100100 001001 1
17 100101 101001 1
* * For example, the number 11 is coded as the sum of the * non-consecutive Fibonacci numbers 8 + 3, so the Fibonacci * representation is 10100 (8 is the fifth number in * the series above, 3 is the third). Its Fibonacci code reverses * the number to 00101 and appends a 1 * to yield 001011. * *

Fibonacci codes can represent arbitrary positive numbers up * to Long.MAX_VALUE. * *

See {@link Math#FIBONACCI_SEQUENCE} for a definition of * the Fibonacci sequence as an array of longs. * *

In the limit (for larger numbers), the number of bits * used by a Fibonacci coding is roughly 60 percent higher * than the number of bits used for a binary code. The benefit * is that Fibonacci codes are prefix codes, whereas binary codes * are not. * * @param n Number to encode. * @throws IllegalArgumentException If the number is not positive. * @throws IOException If there is an I/O exception writing to the * underlying stream. */ public void writeFibonacci(long n) throws IOException { validatePositive(n); long[] fibs = Math.FIBONACCI_SEQUENCE; boolean[] buf = FIB_BUF; int mostSigPlace = mostSigFibonacci(fibs,n); for (int place = mostSigPlace; place >= 0; --place) { if (n >= fibs[place]) { n -= fibs[place]; buf[place] = true; } else { buf[place] = false; } } for (int i = 0; i <= mostSigPlace; ++i) writeBit(buf[i]); writeTrue(); } /** * Writes the bits for the Elias gamma code for the specified * positive number. The gamma code of the number n * is based on its binary representation b[k-1],...,b[0]: * *

* gammaCode(b[k-1],...,b[0]) = unaryCode(k),b[k-1],...,b[0] *
* * In words, the position of the most significant binary digit is * coded using a unary code, with the remaining digits making up * the rest of the gamma code. * *

The Following table provides an illustration of the gamma * coding of the first 17 positive integers. Each row displays * the number being coded, its binary representation, and its * gamma code. The gamma code is displayed as its unary coding of * the number of digits in the binary representation followed by a * space and then by the digits of the binary representation after * the first one. * *

* * * * * * * * * * * * * * * * * * * * *
NumberBinaryGamma code
111
21001 0
31101 1
4100001 00
5101001 01
6110001 10
7111001 11
810000001 000
910010001 001
1010100001 010
1110110001 011
1211000001 100
1311010001 101
1411100001 110
1511110001 111
161000000001 0000
171000100001 0001
* * For more information on gamma coding, see: * * * * @param n Number to code. * @throws IOException If there is an I/O error writing to the * underlying stream. * @throws IllegalArgumentException If the number to be encoded is * zero or negative. */ public void writeGamma(long n) throws IOException { validatePositive(n); if (n == 1l) { writeTrue(); return; } int k = mostSignificantPowerOfTwo(n); writeUnary(k+1); writeLowOrderBits(k,n); } /** * Writes the bits for the Elias delta code for the specified * positive number. The delta code of the number n * is based on its binary representation * b[k-1],...,b[0]: * *
* deltaCode(b[k-1],...,b[0]) = gammaCode(k),b[k-1],...,b[0] *
* * In words, the position of the most significant binary digit is * coded using a gamma code, with the remaining digits making up * the rest of the gamma code. * *

The following table illustrates the delta codes for some * small numbers. Each row lists the number, its binary * representation, and its delta code. The delta code is * written as the initial gamma code of its most significant digit's * position and the remaining bits in the binary representation. * Note that the delta codes are longer for small numbers, * but shorter for large numbers. * *

* * * * * * * * * * * * * * * * * * * * *
NumberBinaryDelta code
111
210010 0
311010 1
4100011 00
5101011 01
6110011 10
7111011 11
8100000100 000
9100100100 001
10101000100 010
11101100100 011
12110000100 100
13110100100 101
14111000100 110
15111100100 111
161000000101 0000
171000100101 0001
* * For more information on delta coding, see: * * * * @param n Number to code. * @throws IOException If there is an I/O error writing to the * underlying stream. * @throws IllegalArgumentException If the number to be encoded is * zero or negative. */ public void writeDelta(long n) throws IOException { validatePositive(n); int numBits = mostSignificantPowerOfTwo(n); // 1 to 63 if (numBits > 63) { throw new IOException("numBits too large=" + numBits); } writeGamma(numBits+1); if (numBits > 0) writeLowOrderBits(numBits,n); } /** * Closes underlying output stream and releases any resources * associated with the stream. This method first flushes the * output stream, which sets any remaining bits in the byte * currently being written to 0. * *

The close method calls the {@link OutputStream#close()} * method on the contained output stream. * * @throws IOException If there is an I/O exception writing the * next byte or closing the underlying output stream. */ public void close() throws IOException { flush(); mOut.close(); } /** * Flushes writes to the underlying output stream. First, this * method sets any bits remaining in the current byte to * 0. It then calls {@link OutputStream#flush()} on * the underlying output stream. * @throws IOException If there is an exception writing to or * flushing the underlying output stream. */ public void flush() throws IOException { if (mNextBitIndex < 7) { mOut.write(mNextByte << mNextBitIndex); // shift to fill reset(); } mOut.flush(); } /** * Writes the specified bit. The boolean true is * used for the bit 1 and false for * 0. * * @param bit Value to write. * @throws IOException If there is an exception writing to the * underlying output stream. */ public void writeBit(boolean bit) throws IOException { if (bit) writeTrue(); else writeFalse(); } /** * Writes a single true (1) bit. * * @throws IOException If there is an exception writing to the * underlying output stream. */ public void writeTrue() throws IOException { if (mNextBitIndex == 0) { mOut.write(mNextByte | 1); reset(); } else { mNextByte = (mNextByte | 1) << 1; --mNextBitIndex; } } /** * Writes a single false (0) bit. * * @throws IOException If there is an exception writing to the * underlying output stream. */ public void writeFalse() throws IOException { if (mNextBitIndex == 0) { mOut.write(mNextByte); reset(); } else { mNextByte <<= 1; --mNextBitIndex; } } // writes out k lowest bits private void writeLowOrderBits(int numBits, long n) throws IOException { /* simple version that works: while (--numBits >= 0) writeBit(((ONE << numBits) & n) != 0); */ // if fits without output, pack and return if (mNextBitIndex >= numBits) { mNextByte = ( (mNextByte << (numBits-1)) | (int) leastSignificantBits2(n,numBits)) << 1; mNextBitIndex -= numBits; return; } // pack rest of bit buffer and output numBits -= (mNextBitIndex + 1); mOut.write((mNextByte << mNextBitIndex) | (int) sliceBits2(n,numBits,mNextBitIndex+1)); // write even numbers of bytes where available while (numBits >= 8) { numBits -= 8; mOut.write((int) sliceBits2(n,numBits,8)); } // write remainder if (numBits == 0) { reset(); return; } mNextByte = ((int) leastSignificantBits2(n,numBits)) << 1; mNextBitIndex = 7 - numBits; } private void reset() { mNextByte = 0; mNextBitIndex = 7; } private static final long ALL_ONES_LONG = ~0l; // not thread safe anyway, so might as well spend 800 bytes for class private static final boolean[] FIB_BUF = new boolean[Math.FIBONACCI_SEQUENCE.length+1]; private static final byte ZERO_BYTE = (byte) 0; /** * Returns the specified number of the least significant bits of * the specified long value as a long. For example, * leastSignificantBits(13,2) = 3, because 13 is * 1011 in binary and the two least significant * digits are 11. * * @param n Value whose least significant bits are returned. * @param numBits The number of bits to return. * @return The least significant number of bits. * @throws IllegalArgumentException If the number of bits is less than * 1 or greater than 64. */ public static long leastSignificantBits(long n, int numBits) { if (numBits < 1 || numBits > 64) { String msg = "Number of bits must be between 1 and 64 inclusive." + " Found numBits=" + numBits; throw new IllegalArgumentException(msg); } return leastSignificantBits2(n,numBits); } /** * Returns a slice of bits in the specified long value running * from the specified least significant bit for the specified * number of bits. The bits are indexed in increasing order of * significance from 0 to 63. So for the binary 110, * the bit indexed 0 is 0, the bit indexed 1 is 1 and the bit * indexed 2 is 1. For example, sliceBits(57,2,3) = * 6, because 57 is 111001 in binary and the * three bits extending to the left from position 2 are * 110, which is 2. * * @param n Value to be sliced. * @param leastSignificantBit Index of least significant bit in * the result. * @param numBits Number of bits including least significant bit * to return. * @throws IllegalArgumentException If the number of bits is less * than zero or greater than 64, or if the least significant bit * index is less than 0 or greater than 63. */ public static long sliceBits(long n, int leastSignificantBit, int numBits) { if (leastSignificantBit < 0 || leastSignificantBit > 63) { String msg = "Least significant bit must be between 0 and 63." + " Found leastSignificantBit=" + leastSignificantBit; throw new IllegalArgumentException(msg); } if (numBits < 1 || numBits > 64) { String msg = "Number of bits must be between 1 and 64 inclusive." + " Found numBits=" + numBits; throw new IllegalArgumentException(msg); } return sliceBits2(n,leastSignificantBit,numBits); } static long leastSignificantBits2(long n, int numBits) { return (ALL_ONES_LONG >>> (64-numBits)) & n; } static long sliceBits2(long n, int leastSignificantBit, int numBits) { return leastSignificantBits2(n >>> leastSignificantBit, numBits); } /** * Returns the index of the most significant bit filled for the * specified long value. For example, * *

     * mostSignificantPowerOfTwo(1) = 0
     * mostSignificantPowerOfTwo(2) = 1
     * mostSignificantPowerOfTwo(4) = 2
     * mostSignificantPowerOfTwo(8) = 3
     * 
* *

This result of this method may be defined in terms of * the built-in method {@link Long#numberOfLeadingZeros(long)}, added * in Java 1.5, by: * *

     * mostSignificantPowerOfTwo(n) = Math.max(0,63-Long.numberOfLeadingZeros(n))
     * 
* * @param n The specified value. * @return The most significant power of 2 of the specified value. */ public static int mostSignificantPowerOfTwo(long n) { int sum = (n >> 32 != 0) ? 32 : 0; if (n >> (sum | 16) != 0) sum = (sum | 16); if (n >> (sum | 8) != 0) sum = (sum | 8); if (n >> (sum | 4) != 0) sum = (sum | 4); if (n >> (sum | 2) != 0) sum = (sum | 2); return (n >> (sum | 1) != 0) ? (sum | 1) : sum; } static int mostSigFibonacci(long[] fibs, long n) { int low = 0; int high = fibs.length-1; while (low <= high) { int mid = (low + high) / 2; if (fibs[mid] < n) low = (low == mid) ? mid+1 : mid; else if (fibs[mid] > n) high = (high == mid) ? mid-1 : mid; else return mid; } return low-1; } static void validateNumBits(int numBits) { if (numBits > 0) return; String msg = "Number of bits must be positive." + " Found numBits=" + numBits; throw new IllegalArgumentException(msg); } static void validatePositive(long n) { if (n > 0) return; String msg = "Require number greater than zero." + " Found n=" + n; throw new IllegalArgumentException(msg); } static void validateNonNegative(long n) { if (n >= 0) return; String msg = "Require non-negative number." + " Found n=" + n; throw new IllegalArgumentException(msg); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy