com.aliasi.io.BitOutput Maven / Gradle / Ivy
Show all versions of aliasi-lingpipe Show documentation
package com.aliasi.io; import com.aliasi.util.Math; import java.io.OutputStream; import java.io.IOException; /** * A
* * both of which are defined by shifting, and then coding each * in turn using a unary code for the quotient and binary code for * the remainder: * *BitOutput
wraps an underlying output stream to * provide bit-level output. Output is written through the method * {@link #writeBit(boolean)}, withtrue
used for the bit *1
andfalse
for the bit0
. * The methods {@link #writeTrue()} and {@link #writeFalse()} are * shorthand forwriteBit(true)
and *writeBit(false)
respectively. * *If the number of bits written before closing the output does not * land on a byte boundary, the remaining fractional byte is filled * with
0
bits. * *None of the methods in this class are safe for concurrent access * by multiple threads. * * @author Bob Carpenter * @version 2.1.1 * @since LingPipe2.1.1 */ public class BitOutput { private int mNextByte; private int mNextBitIndex; private final OutputStream mOut; /** * Construct a bit output wrapping the specified output stream. * * @param out Underlying output stream. */ public BitOutput(OutputStream out) { mOut = out; reset(); } /** * Writes the bits for a unary code for the specified positive * number. The unary code for the number
n
is * defined by: * ** * In words, the number* unaryCode(n) = 0n-1 1 *
n
is coded as *n-1
zeros followed by a one. The following * table illustrates the first few unary codes: * ** * @param n Number to code. * @throws IOException If there is an I/O error writing * to the underlying output stream. * @throws IllegalArgumentException If the number to be encoded is * zero or negative. */ public void writeUnary(int n) throws IOException { validatePositive(n); // fit in buffer int numZeros = n - 1; if (numZeros <= mNextBitIndex) { mNextByte = mNextByte << numZeros; mNextBitIndex -= numZeros; writeTrue(); return; } // fill buffer, write and flush // numZeros > mNextBitIndex mOut.write(mNextByte << mNextBitIndex); numZeros -= (mNextBitIndex+1); reset(); // fill in even multiples of eight for (; numZeros >= 8; numZeros -= 8) mOut.write(ZERO_BYTE); // fill in last zeros mNextBitIndex -= numZeros; writeTrue(); } /** * Writes the bits of a binary representation of the specified * non-negative number in the specified number of bits. if the * number will not fit in the number of bits specified, an * exception is raised. * **
* Number Code * 1 1
* 2 01
* 3 001
* 4 0001
* 5 00001
For instance, the following illustrates one, two and * three-bit codings. * *
* * @param n Number to code. * @param numBits Number of bits to use for coding. * @throws IllegalArgumentException If the number to code is * negative, the number of bits is greater than 63, or the number * will not fit into the specified number of bits. * @throws IOException If there is an error writing to the * underlying output stream. */ public void writeBinary(long n, int numBits) throws IOException { validateNonNegative(n); validateNumBits(numBits); int k = mostSignificantPowerOfTwo(n); if (k >= numBits) { String msg = "Number will not fit into number of bits." + " n=" + n + " numBits=" + numBits; throw new IllegalArgumentException(msg); } writeLowOrderBits(numBits,n); } /** * Writes the bits for Rice code for the specified non-negative * number with the specified number of bits fixed for the binary * remainder. Rice coding is a form of Golomb coding where the * Golomb paramemter is a power of two (2 to the number of bits in * the remainder). The Rice code is defined by unary coding a * magnitude and then binary coding the remainder. It can be * defined by taking a quotient and remainder: * **
* Number *Binary *Code for Num Bits * 1 2 3 * 0 1 *0 *00 *000 * 1 1 *1 *01 *001 * 2 10 ** Exception
10 *010 * 3 10 ** Exception
11 *011 * 4 100 ** Exception
* Exception
100 * 5 101 ** Exception
* Exception
101 * 6 110 ** Exception
* Exception
110 * 7 111 ** Exception
* Exception
111 * 8 1000 ** Exception
* Exception
Exception
**
** m = 2b *= (1<<b) * q = (n - 1) / m *= (n - 1) >>> b * r = n - q*m - 1 *= n - (q << b) - 1 * * For example, we get the following codes with the number of * fixed remainder bits set to 1, 2 and 3, with the unary coded * quotient separated from the binary coded remainder by a space: * ** riceCode(n,b) = unaryCode(q) binaryCode(r) *
* * In the limit, if the number of remaining bits to code is set to * zero, the Rice code would reduce to a unary code: * **
* Number *n
Binary *Code for Number of Remainder Bits * b=1 b=2 b=3 * 1 1 *1 0 1 00 1 000 * 2 10 *1 1 1 01 1 001 * 3 11 *01 0 1 10 1 010 * 4 100 *01 1 1 11 1 011 * 5 101 *001 0 01 00 1 100 * 6 110 *001 1 01 01 1 101 * 7 111 *0001 0 01 10 1 110 * 8 1000 *0001 1 01 11 1 111 * 9 1001 *00001 0 001 00 01 000 * 10 1010 *00001 1 001 01 01 001 * 11 1011 *000001 0 001 10 01 010 * 12 1100 *000001 1 001 11 01 011 * 13 1101 *0000001 0 0001 00 01 100 * 14 1110 *0000001 1 0001 01 01 101 * 15 1111 *00000001 0 0001 10 01 110 * 16 10000 *00000001 1 0001 11 01 111 * 17 10001 *000000001 0 00001 00 001 000 * * but this method will throw an exception with a remainder size * of zero. * ** riceCode(n,0) = unaryCode(n) *
In the limit the other way, if the number of remaining bits * is set to the width of the maximum value, the Rice code is just * the unary coding of 1, which is the single binary digit 1, * followed by the binary code itself: * *
* ** riceCode(n,64) = unaryCode(1) binaryCode(n,64) = 1 binaryCode(n,64) *
The method will throw an exception if the encoding * produces a unary code that would output more bits * than would fit in a positive integer (that is, more * than (232-1) bits. * * For more information, see: * *
-
*
*
- Golomb, S. 1966. Run-length encodings. IEEE
* Trans. Inform. Theory.
12 (3):399-401. * * - Rice, R. F. 1979. Some practical universal noiseless * coding techniques. JPL Publication 79-22. March 1979. * *
- Witten, Ian H., Alistair Moffat, and Timothy C. Bell. * 1999. Managing Gigabytes. Academic Press. * *
- Wikipedia: Golomb coding * *
Fibonacci * numbers are defined by setting * *
* * The first few Fibonacci numbers are: * ** Fib(0) = 0 * Fib(1) = 1 * Fib(n+2) = Fib(n+1) + Fib(n) *
* 0, 1, 1, 2, 3, 5, 8, 13, 21, ...
*
*
* This method starts with the second 1
value,
* namely Fib(2)
, making the sequence a sequence
* of unique numbers starting with 1, 2, 3, 5,...
.
*
* The Fibonacci representation of a number is a bit vector * indicating the Fibonacci numbers used in the sum. The * Fibonacci code reverses the Fibonacci representation and * appends a 1 bit. Here are examples for the first 17 numbers: * *
* * For example, the number 11 is coded as the sum of the * non-consecutive Fibonacci numbers 8 + 3, so the Fibonacci * representation is*
* Number Fibonacci Representation *Fibonacci Code * 1 1 11 * 2 10 01 1 * 3 100 001 1 * 4 101 101 1 * 5 1000 0001 1 * 6 1001 1001 1 * 7 1010 0101 1 * 8 10000 00001 1 * 9 10001 10001 1 * 10 10010 01001 1 * 11 10100 00101 1 * 12 10101 10101 1 * 13 100000 000001 1 * 14 100001 100001 1 * 15 100010 010001 1 * 16 100100 001001 1 * 17 100101 101001 1
10100
(8 is the fifth number in
* the series above, 3 is the third). Its Fibonacci code reverses
* the number to 00101
and appends a 1
* to yield 001011
.
*
* Fibonacci codes can represent arbitrary positive numbers up
* to Long.MAX_VALUE
.
*
*
See {@link Math#FIBONACCI_SEQUENCE} for a definition of * the Fibonacci sequence as an array of longs. * *
In the limit (for larger numbers), the number of bits
* used by a Fibonacci coding is roughly 60 percent higher
* than the number of bits used for a binary code. The benefit
* is that Fibonacci codes are prefix codes, whereas binary codes
* are not.
*
* @param n Number to encode.
* @throws IllegalArgumentException If the number is not positive.
* @throws IOException If there is an I/O exception writing to the
* underlying stream.
*/
public void writeFibonacci(long n) throws IOException {
validatePositive(n);
long[] fibs = Math.FIBONACCI_SEQUENCE;
boolean[] buf = FIB_BUF;
int mostSigPlace = mostSigFibonacci(fibs,n);
for (int place = mostSigPlace; place >= 0; --place) {
if (n >= fibs[place]) {
n -= fibs[place];
buf[place] = true;
} else {
buf[place] = false;
}
}
for (int i = 0; i <= mostSigPlace; ++i)
writeBit(buf[i]);
writeTrue();
}
/**
* Writes the bits for the Elias gamma code for the specified
* positive number. The gamma code of the number n
* is based on its binary representation b[k-1],...,b[0]
:
*
*
* gammaCode(b[k-1],...,b[0]) = unaryCode(k),b[k-1],...,b[0]
*
*
* In words, the position of the most significant binary digit is
* coded using a unary code, with the remaining digits making up
* the rest of the gamma code.
*
* The Following table provides an illustration of the gamma * coding of the first 17 positive integers. Each row displays * the number being coded, its binary representation, and its * gamma code. The gamma code is displayed as its unary coding of * the number of digits in the binary representation followed by a * space and then by the digits of the binary representation after * the first one. * *
* * For more information on gamma coding, see: * **
* Number *Binary *Gamma code * 1 1 1 * 2 10 01 0 * 3 11 01 1 * 4 100 001 00 * 5 101 001 01 * 6 110 001 10 * 7 111 001 11 * 8 1000 0001 000 * 9 1001 0001 001 * 10 1010 0001 010 * 11 1011 0001 011 * 12 1100 0001 100 * 13 1101 0001 101 * 14 1110 0001 110 * 15 1111 0001 111 * 16 10000 00001 0000 * 17 10001 00001 0001
-
*
- Witten, Ian H., Alistair Moffat, and Timothy C. Bell. * 1999. Managing Gigabytes. Academic Press. *
- Wikipedia: Elias gamma coding *
n
* is based on its binary representation
* b[k-1],...,b[0]
:
*
*
* deltaCode(b[k-1],...,b[0]) = gammaCode(k),b[k-1],...,b[0]
*
*
* In words, the position of the most significant binary digit is
* coded using a gamma code, with the remaining digits making up
* the rest of the gamma code.
*
* The following table illustrates the delta codes for some * small numbers. Each row lists the number, its binary * representation, and its delta code. The delta code is * written as the initial gamma code of its most significant digit's * position and the remaining bits in the binary representation. * Note that the delta codes are longer for small numbers, * but shorter for large numbers. * *
* * For more information on delta coding, see: * **
* Number *Binary *Delta code * 1 1 1 * 2 10 010 0 * 3 11 010 1 * 4 100 011 00 * 5 101 011 01 * 6 110 011 10 * 7 111 011 11 * 8 1000 00100 000 * 9 1001 00100 001 * 10 1010 00100 010 * 11 1011 00100 011 * 12 1100 00100 100 * 13 1101 00100 101 * 14 1110 00100 110 * 15 1111 00100 111 * 16 10000 00101 0000 * 17 10001 00101 0001
-
*
- Witten, Ian H., Alistair Moffat, and Timothy C. Bell. * 1999. Managing Gigabytes. Academic Press. *
- Wikipedia: Elias delta coding *
0
.
*
* The close method calls the {@link OutputStream#close()}
* method on the contained output stream.
*
* @throws IOException If there is an I/O exception writing the
* next byte or closing the underlying output stream.
*/
public void close() throws IOException {
flush();
mOut.close();
}
/**
* Flushes writes to the underlying output stream. First, this
* method sets any bits remaining in the current byte to
* 0
. It then calls {@link OutputStream#flush()} on
* the underlying output stream.
* @throws IOException If there is an exception writing to or
* flushing the underlying output stream.
*/
public void flush() throws IOException {
if (mNextBitIndex < 7) {
mOut.write(mNextByte << mNextBitIndex); // shift to fill
reset();
}
mOut.flush();
}
/**
* Writes the specified bit. The boolean true
is
* used for the bit 1
and false
for
* 0
.
*
* @param bit Value to write.
* @throws IOException If there is an exception writing to the
* underlying output stream.
*/
public void writeBit(boolean bit) throws IOException {
if (bit) writeTrue();
else writeFalse();
}
/**
* Writes a single true
(1
) bit.
*
* @throws IOException If there is an exception writing to the
* underlying output stream.
*/
public void writeTrue() throws IOException {
if (mNextBitIndex == 0) {
mOut.write(mNextByte | 1);
reset();
} else {
mNextByte = (mNextByte | 1) << 1;
--mNextBitIndex;
}
}
/**
* Writes a single false
(0
) bit.
*
* @throws IOException If there is an exception writing to the
* underlying output stream.
*/
public void writeFalse() throws IOException {
if (mNextBitIndex == 0) {
mOut.write(mNextByte);
reset();
} else {
mNextByte <<= 1;
--mNextBitIndex;
}
}
// writes out k lowest bits
private void writeLowOrderBits(int numBits, long n) throws IOException {
/* simple version that works:
while (--numBits >= 0)
writeBit(((ONE << numBits) & n) != 0);
*/
// if fits without output, pack and return
if (mNextBitIndex >= numBits) {
mNextByte
= ( (mNextByte << (numBits-1))
| (int) leastSignificantBits2(n,numBits))
<< 1;
mNextBitIndex -= numBits;
return;
}
// pack rest of bit buffer and output
numBits -= (mNextBitIndex + 1);
mOut.write((mNextByte << mNextBitIndex)
| (int) sliceBits2(n,numBits,mNextBitIndex+1));
// write even numbers of bytes where available
while (numBits >= 8) {
numBits -= 8;
mOut.write((int) sliceBits2(n,numBits,8));
}
// write remainder
if (numBits == 0) {
reset();
return;
}
mNextByte = ((int) leastSignificantBits2(n,numBits)) << 1;
mNextBitIndex = 7 - numBits;
}
private void reset() {
mNextByte = 0;
mNextBitIndex = 7;
}
private static final long ALL_ONES_LONG = ~0l;
// not thread safe anyway, so might as well spend 800 bytes for class
private static final boolean[] FIB_BUF
= new boolean[Math.FIBONACCI_SEQUENCE.length+1];
private static final byte ZERO_BYTE = (byte) 0;
/**
* Returns the specified number of the least significant bits of
* the specified long value as a long. For example,
* leastSignificantBits(13,2) = 3
, because 13 is
* 1011
in binary and the two least significant
* digits are 11
.
*
* @param n Value whose least significant bits are returned.
* @param numBits The number of bits to return.
* @return The least significant number of bits.
* @throws IllegalArgumentException If the number of bits is less than
* 1 or greater than 64.
*/
public static long leastSignificantBits(long n, int numBits) {
if (numBits < 1 || numBits > 64) {
String msg = "Number of bits must be between 1 and 64 inclusive."
+ " Found numBits=" + numBits;
throw new IllegalArgumentException(msg);
}
return leastSignificantBits2(n,numBits);
}
/**
* Returns a slice of bits in the specified long value running
* from the specified least significant bit for the specified
* number of bits. The bits are indexed in increasing order of
* significance from 0 to 63. So for the binary 110
,
* the bit indexed 0 is 0, the bit indexed 1 is 1 and the bit
* indexed 2 is 1. For example, sliceBits(57,2,3) =
* 6
, because 57 is 111001
in binary and the
* three bits extending to the left from position 2 are
* 110
, which is 2.
*
* @param n Value to be sliced.
* @param leastSignificantBit Index of least significant bit in
* the result.
* @param numBits Number of bits including least significant bit
* to return.
* @throws IllegalArgumentException If the number of bits is less
* than zero or greater than 64, or if the least significant bit
* index is less than 0 or greater than 63.
*/
public static long sliceBits(long n, int leastSignificantBit,
int numBits) {
if (leastSignificantBit < 0 || leastSignificantBit > 63) {
String msg = "Least significant bit must be between 0 and 63."
+ " Found leastSignificantBit=" + leastSignificantBit;
throw new IllegalArgumentException(msg);
}
if (numBits < 1 || numBits > 64) {
String msg = "Number of bits must be between 1 and 64 inclusive."
+ " Found numBits=" + numBits;
throw new IllegalArgumentException(msg);
}
return sliceBits2(n,leastSignificantBit,numBits);
}
static long leastSignificantBits2(long n, int numBits) {
return (ALL_ONES_LONG >>> (64-numBits)) & n;
}
static long sliceBits2(long n, int leastSignificantBit, int numBits) {
return leastSignificantBits2(n >>> leastSignificantBit,
numBits);
}
/**
* Returns the index of the most significant bit filled for the
* specified long value. For example,
*
*
* ** mostSignificantPowerOfTwo(1) = 0 * mostSignificantPowerOfTwo(2) = 1 * mostSignificantPowerOfTwo(4) = 2 * mostSignificantPowerOfTwo(8) = 3 *
This result of this method may be defined in terms of * the built-in method {@link Long#numberOfLeadingZeros(long)}, added * in Java 1.5, by: * *
* * @param n The specified value. * @return The most significant power of 2 of the specified value. */ public static int mostSignificantPowerOfTwo(long n) { int sum = (n >> 32 != 0) ? 32 : 0; if (n >> (sum | 16) != 0) sum = (sum | 16); if (n >> (sum | 8) != 0) sum = (sum | 8); if (n >> (sum | 4) != 0) sum = (sum | 4); if (n >> (sum | 2) != 0) sum = (sum | 2); return (n >> (sum | 1) != 0) ? (sum | 1) : sum; } static int mostSigFibonacci(long[] fibs, long n) { int low = 0; int high = fibs.length-1; while (low <= high) { int mid = (low + high) / 2; if (fibs[mid] < n) low = (low == mid) ? mid+1 : mid; else if (fibs[mid] > n) high = (high == mid) ? mid-1 : mid; else return mid; } return low-1; } static void validateNumBits(int numBits) { if (numBits > 0) return; String msg = "Number of bits must be positive." + " Found numBits=" + numBits; throw new IllegalArgumentException(msg); } static void validatePositive(long n) { if (n > 0) return; String msg = "Require number greater than zero." + " Found n=" + n; throw new IllegalArgumentException(msg); } static void validateNonNegative(long n) { if (n >= 0) return; String msg = "Require non-negative number." + " Found n=" + n; throw new IllegalArgumentException(msg); } }* mostSignificantPowerOfTwo(n) = Math.max(0,63-Long.numberOfLeadingZeros(n)) *