All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.atmosphere.gwt.server.deflate.Deflater Maven / Gradle / Ivy

There is a newer version: 1.1.0.RC5
Show newest version
/*
 *
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright 2007-2008 Sun Microsystems, Inc. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License. You can obtain
 * a copy of the License at https://glassfish.dev.java.net/public/CDDL+GPL.html
 * or glassfish/bootstrap/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at glassfish/bootstrap/legal/LICENSE.txt.
 * Sun designates this particular file as subject to the "Classpath" exception
 * as provided by Sun in the GPL Version 2 section of the License file that
 * accompanied this code.  If applicable, add the following below the License
 * Header, with the fields enclosed by brackets [] replaced by your own
 * identifying information: "Portions Copyrighted [year]
 * [name of copyright owner]"
 *
 * Contributor(s):
 *
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 *
 */
// $Id: Deflater.java 122 2007-08-18 08:25:04Z pornin $
/*
 * Copyright (c) 2007  Thomas Pornin
 * 
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 * 
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

package org.atmosphere.gwt.server.deflate;

import java.io.IOException;
import java.io.OutputStream;

/**
 * This class implements the core DEFLATE process, i.e. the compression of data into DEFLATE blocks.
 * 
 * @version $Revision: 122 $
 * @author Thomas Pornin
 */

public final class Deflater {
	
	/*
	 * In this class, all state variables and arrays have been designed such that the default values (all-zero) are
	 * fine.
	 */

	/* =========================================================== */
	/*
	 * Input data mangement.
	 * 
	 * The window keeps a copy of the previously encountered uncompressed data bytes. In the beginning, the window is
	 * empty, then filled up to a certain limit (a power of 2, at most 32768). Beyond that limit, it is managed as a
	 * circular buffer.
	 * 
	 * To each data byte we associate a triplet, consisting of that byte and the next two bytes. We assemble triplets
	 * into 24-bit values, such that the lower 8 bits are the most recently received. Each window slot contains a
	 * triplet; hence, each input data byte appears in three distinct, successive slots in the window.
	 * 
	 * Window triplets are linked as hash chains, with the hashLink[] array. The hashTable[] array contains the chain
	 * entry points.
	 * 
	 * We also copy input bytes into the ucBuffer[] array (up to some limit). These are used to produce uncompressed
	 * data blocks if it turns out that the compressor cannot find anything better. The size limit is computed so that
	 * when an uncompressed data block must be produced, then the size limit on ucBuffer[] has not been reached. Note:
	 * technically, we could limit ucBuffer[] to the window length and complete the uncompressed block by reconstructing
	 * the input bytes from the symbols in buffer[]. This would be interesting if we used a large buffer[] array. But
	 * such a large symbol buffer is undesirable since it prevents the dynamic Huffman codes from adapting to changes in
	 * the file content type.
	 */

	/**
	 * The window buffer. Entry of index "n" contains the triplet of bytes beginning at index "n".
	 */
	private int[] window;
	
	/**
	 * windowPtr contains the window index where the next triplet will go.
	 */
	private int windowPtr;
	
	/**
	 * This state variable is initially 0; it is set to 1 after the first data byte has been received, and 2 once the
	 * second data byte has been received.
	 */
	private int windowState;
	
	/**
	 * recentBytes is a 16-bit value which contains the last two received bytes.
	 */
	private int recentBytes;
	
	/**
	 * Hash chains: element "n" in this array contains the backward distance to the previous triplet with the same hash
	 * (or 0, if there is none). Due to the window buffer reuse, the previous triplet may have been overwritten: the
	 * code which walks a chain must make sure that it stays within the window size.
	 */
	private char[] windowLink;
	
	/**
	 * For the hash value "v", hashTable[v] contains the value "j". If j == 0, then there is
	 * no current chain for this hash value; otherwise, the chain head is in the window, at index "j-1". It may happen
	 * that the chain head entry has been overwritten: the insertion code checks the designated entry when linking a new
	 * triplet.
	 */
	private char[] hashTable;
	
	/**
	 * The buffer which receives a copy of the uncompressed data. That buffer is circular.
	 */
	private byte[] ucBuffer;
	
	/**
	 * The length of the accumulated data in ucBuffer.
	 */
	private int ucBufferPtr;
	
	/**
	 * The current to-match sequence length. If its value is 0, 1 or 2, then we do not have a triplet yet and there is
	 * no match. Otherwise, with a value of 3 or more, than there is a match with a previous sequence which begins at
	 * index seqPtr and distance seqDist.
	 */
	private int seqLen;
	
	/**
	 * The index at which the current match begins (ignored if seqLen is 2 or less).
	 */
	private int seqPtr;
	
	/**
	 * The distance between the current matched sequence and its source. Ignored if seqLen is 2 or less.
	 */
	private int seqDist;
	
	/* =========================================================== */

	/*
	 * Compression parameters.
	 * 
	 * These parameters alter both the compression ratio and the compression speed.
	 */

	/**
	 * The maximum hash chain explored length when looking for a triplet.
	 */
	private int maxChainLengthTriplet;
	
	/**
	 * The maximum backward distance when looking for a triplet.
	 */
	private int maxDistanceTriplet;
	
	/**
	 * The maximum hash chain explored length when looking for a previous longer sequence.
	 */
	private int maxChainLengthSeq1;
	
	/**
	 * The maximum backward distance when looking for a previous longer sequence.
	 */
	private int maxDistanceSeq1;
	
	/**
	 * If the current sequence exceeds this length, then the explored chain length when looking for a previous longer
	 * distance is reduced (divided by four).
	 */
	private int goodSequenceLength;
	
	/**
	 * If the current sequence exceeds this length, then a lazy match is not attempted.
	 */
	private int maxLazyLength;
	
	/**
	 * The maximum hash chain explored length when looking for a sequence in lazy match mode.
	 */
	private int maxChainLengthSeq2;
	
	/**
	 * The maximum backward distance when looking for a sequence in lazy match mode.
	 */
	private int maxDistanceSeq2;
	
	/**
	 * If true, then no LZ77 sharing will be performed. Compression will come only from the Huffman codes.
	 */
	private boolean huffOnly;
	
	/* =========================================================== */
	/*
	 * Output data management.
	 * 
	 * Output symbols are accumulated in a buffer. Each entry is a 32-bit word consisting of: -- literal value in the
	 * literal+length alphabet (9 bits) -- extra length bits (5 bits) -- distance symbol (5 bits) -- extra distance
	 * length (13 bits) (values ordered from least- to most-significant bits)
	 * 
	 * If the literal value is not a sequence copy symbol (i.e. it has value 255 or less) then the other fields are
	 * zero. Note that the end-of-block special literal (value 256) is never written in that buffer.
	 * 
	 * The lengths of the buffer[] and ucBuffer[] arrays are linked. Basically, if there are bufferLen symbols in
	 * buffer[], then these could be encoded as (at worst) 32bufferLen bits, in a type 1 block (plus the three-bit block
	 * header). Hence, a type 0 block may be used only if the uncompressed data length is no more than 32bufferLen-32
	 * bits, because this is the payload length for an uncompressed block of size 32bufferLen bits (then again excluding
	 * the three-bit header, and the additional byte-alignment padding bits). Therefore, ucBuffer.length needs not be
	 * larger than 4buffer.length (we need 258 more extra bytes to accomodate a currently matched sequence).
	 * 
	 * We furthermore limit buffer.length to 16384, thus guaranteeing that when type 0 blocks are selected, no more than
	 * 65532 bytes are to be written out, which fits in a single block. The drawback is that uncompressible data may
	 * fill buffer[] after only 16384 literal bytes, which yields an overhead four times larger than the optimal
	 * overhead. This is not a serious issue: it turns out that zlib has the same overhead and is quite happy with it.
	 * 
	 * Actual writes to the output stream use an internal buffer so that unbuffered transport streams can be used.
	 * Compression is inherently a buffered process.
	 */

	/**
	 * The output buffer for symbols.
	 */
	private int[] buffer;
	
	/**
	 * The number of symbols currently accumulated in the buffer.
	 */
	private int bufferPtr;
	
	/**
	 * The underlying transport stream for compressed data.
	 */
	private OutputStream out;
	
	/**
	 * This byte value accumulates up to 7 bits, to be sent as part of the next complete byte.
	 */
	private int outByte;
	
	/**
	 * The number of accumulated bits in outByte (between 0 and 7, inclusive).
	 */
	private int outPtr;
	
	/**
	 * This buffer is used internally to accumulate compressed data bytes before sending them to the transport stream.
	 */
	private byte[] outBuf;
	
	/**
	 * This pointer value marks the current limit to the data stored in outBuf[].
	 */
	private int outBufPtr;
	
	/**
	 * This flag is set to true when processing a dictionary.
	 */
	private boolean noOutput;
	
	/* =========================================================== */

	/**
	 * Compression level 1: do not apply LZ77; only Huffman codes are computed. This is faster than SPEED
	 * but with a degraded compression ratio.
	 */
	public static final int HUFF = 1;
	
	/**
	 * Compression level 2: optimize for speed.
	 */
	public static final int SPEED = 2;
	
	/**
	 * Compression level 3: compromise between speed and compression ratio. This is the default level.
	 */
	public static final int MEDIUM = 3;
	
	/**
	 * Compression level 4: try to achieve best compression ratio, possibly at the expense of compression speed. This
	 * level may occasionaly yield slightly worse results than the MEDIUM level.
	 */
	public static final int COMPACT = 4;
	
	/**
	 * Build a deflater with the default parameters (MEDIUM level, 15-bit window).
	 */
	public Deflater() {
		this(0, 15);
	}
	
	/**
	 * Build a deflater with the provided compression strategy (either SPEED, MEDIUM or
	 * COMPACT). A standard 15-bit window is used. If the compression level is 0, then the default
	 * compression level is selected.
	 * 
	 * @param level
	 *            the compression strategy
	 */
	public Deflater(int level) {
		this(level, 15);
	}
	
	/**
	 * Build a deflater with the provided compression strategy (either SPEED, MEDIUM or
	 * COMPACT) and the provided window size. The window size is expressed in bits and may range from 9 to
	 * 15 (inclusive). The DEFLATE format uses a 15-bit window; by using a smaller window, the produced stream can be
	 * inflated with less memory (but compression ratio is lowered). If the compression level is 0, then the default
	 * compression level is selected.
	 * 
	 * @param level
	 *            the compression strategy
	 * @param windowBits
	 *            the window bit size (9 to 15)
	 */
	public Deflater(int level, int windowBits) {
		if (windowBits < 9 || windowBits > 15)
			throw new IllegalArgumentException("invalid LZ77 window bit length: " + windowBits);
		
		/*
		 * The internal buffer should not be too large, because it prevents the Huffman codes from adapting to data
		 * internal changes. When the internal buffer is too small, the block headers and dynamic tree representations
		 * become too expensive. The maximum value is 16384; to use something greater, the type 0 block code in
		 * endBlock() would have to be adapted.
		 */
		int bufferLen = 16384;
		
		int windowLen = 1 << windowBits;
		window = new int[windowLen];
		windowLink = new char[windowLen];
		hashTable = new char[windowLen];
		maxDistanceTriplet = windowLen - 261;
		maxDistanceSeq1 = windowLen - 261;
		maxDistanceSeq2 = windowLen - 261;
		if (level == 0)
			level = MEDIUM;
		switch (level) {
		case SPEED:
			maxChainLengthTriplet = 16;
			maxChainLengthSeq1 = 8;
			goodSequenceLength = 8;
			maxLazyLength = 4;
			maxChainLengthSeq2 = 4;
			break;
		case MEDIUM:
			maxChainLengthTriplet = 128;
			maxChainLengthSeq1 = 128;
			goodSequenceLength = 64;
			maxLazyLength = 64;
			maxChainLengthSeq2 = 32;
			break;
		case COMPACT:
			maxChainLengthTriplet = 1024;
			maxChainLengthSeq1 = 1024;
			goodSequenceLength = 258;
			maxLazyLength = 258;
			maxChainLengthSeq2 = 1024;
			break;
		case HUFF:
			huffOnly = true;
			break;
		default:
			throw new IllegalArgumentException("unknown compression level: " + level);
		}
		buffer = new int[bufferLen];
		ucBuffer = new byte[4 * bufferLen + 258];
		outBuf = new byte[4096];
	}
	
	/**
	 * Get the current output transport stream.
	 * 
	 * @return the current output stream
	 */
	public OutputStream getOut() {
		return out;
	}
	
	/**
	 * Set the current output transport stream. This method must be called before compressing data.
	 * 
	 * @param out
	 *            the new transport stream
	 */
	public void setOut(OutputStream out) {
		this.out = out;
	}
	
	/* =========================================================== */
	/*
	 * LZ77 implementation.
	 */

	/**
	 * Compute a 32-bit copy symbol, for the provided sequence length and source distance.
	 * 
	 * @param len
	 *            the sequence length
	 * @param dist
	 *            the source sequence distance
	 * @return the copy symbol
	 */
	private static int makeCopySymbol(int len, int dist) {
		int symlen, elen;
		if (len <= 10) {
			symlen = 254 + len;
			elen = 0;
		}
		else if (len == 258) {
			symlen = 285;
			elen = 0;
		}
		else {
			/*
			 * This may be optimized with a dichotomic search.
			 */
			int i;
			for (i = 9; i < 29; i++)
				if (LENGTH[i] > len)
					break;
			symlen = 256 + i;
			elen = len - LENGTH[i - 1];
		}
		
		int symdist, edist;
		if (dist <= 4) {
			symdist = dist - 1;
			edist = 0;
		}
		else {
			/*
			 * This may be optimized with a dichotomic search.
			 */
			int i;
			for (i = 5; i < 30; i++)
				if (DIST[i] > dist)
					break;
			symdist = i - 1;
			edist = dist - DIST[i - 1];
		}
		
		return symlen + (elen << 9) + (symdist << 14) + (edist << 19);
	}
	
	/**
	 * Find a previous sequence, identical to the sequence at index orig and length len, such
	 * that this previous sequence is continued by three bytes equal to the provided end value.
	 * IMPORTANT: the "original" sequence must actually be longer than len by two bytes,
	 * which match the high two bytes of end.
	 * 
	 * @param win
	 *            the window buffer
	 * @param winMask
	 *            the window buffer length, minus one
	 * @param wlink
	 *            the window link buffer
	 * @param orig
	 *            the original sequence begin index
	 * @param dist
	 *            the distance associated with the original sequence
	 * @param len
	 *            the original sequence length
	 * @param end
	 *            the sequence end triplet
	 * @param maxLen
	 *            the maximum explored chain length
	 * @param maxDist
	 *            the maximum accepted distance
	 * @return the previous longer sequence distance, or 0 if not found
	 */
	private static int findPreviousSequence(int[] win, int winMask, char[] wlink, int orig, int dist, int len, int end, int maxLen, int maxDist) {
		int n = orig;
		int chainLength = maxLen;
		loop: while (chainLength-- > 0) {
			int d = wlink[n];
			if (d == 0)
				return 0;
			dist += d;
			if (dist > maxDist)
				return 0;
			n = (n - d) & winMask;
			int tm = len;
			int j = orig, k = n;
			if (tm >= 3) {
				while (tm >= 3) {
					if (win[j] != win[k])
						continue loop;
					tm -= 3;
					j = (j + 3) & winMask;
					k = (k + 3) & winMask;
				}
				switch (tm) {
				case 1:
					j = (j - 2) & winMask;
					k = (k - 2) & winMask;
					if (win[j] != win[k])
						continue loop;
					k = (k + 3) & winMask;
					break;
				case 2:
					j = (j - 1) & winMask;
					k = (k - 1) & winMask;
					if (win[j] != win[k])
						continue loop;
					k = (k + 3) & winMask;
					break;
				}
			}
			else {
				if (win[j] != win[k])
					continue loop;
				k = (k + tm) & winMask;
			}
			if (win[k] == end)
				return dist;
		}
		return 0;
	}
	
	/**
	 * Update the ucBuffer array with the provided uncompressed data. This must be done before ending the
	 * block.
	 * 
	 * @param buf
	 *            the source data buffer
	 * @param off1
	 *            the source start offset (inclusive)
	 * @param off2
	 *            the source end offset (exclusive)
	 */
	private void updateUCBuffer(byte[] buf, int off1, int off2) {
		int uLen = ucBuffer.length;
		int sInc = (uLen >>> 1);
		while (off1 < off2) {
			int free = uLen - ucBufferPtr;
			int tc = off2 - off1;
			if (tc > sInc)
				tc = sInc;
			if (tc > free) {
				System.arraycopy(buf, off1, ucBuffer, ucBufferPtr, free);
				System.arraycopy(buf, off1 + free, ucBuffer, 0, tc - free);
				ucBufferPtr = tc - free;
			}
			else {
				System.arraycopy(buf, off1, ucBuffer, ucBufferPtr, tc);
				ucBufferPtr += tc;
			}
			off1 += tc;
		}
	}
	
	/**
	 * Process some more data. When the internal buffer is full, or when the compressor code deems it appropriate, the
	 * data is compressed and sent on the transport stream. This method is most efficient when data is input by big
	 * enough chunks (at least a dozen bytes per call).
	 * 
	 * @param buf
	 *            the data buffer
	 * @param off
	 *            the data offset
	 * @param len
	 *            the data length (in bytes)
	 * @throws IOException
	 *             on I/O error with the transport stream
	 */
	public void process(byte[] buf, int off, int len) throws IOException {
		if (len == 0)
			return;
		int origOff = off;
		
		/*
		 * If in "Huffman only" mode, then we use an alternate loop.
		 */
		if (huffOnly) {
			int[] sb = buffer;
			int sbPtr = bufferPtr;
			int sbLen = sb.length;
			while (len-- > 0) {
				int bv = buf[off++] & 0xFF;
				sb[sbPtr++] = bv;
				if (sbPtr == sbLen) {
					updateUCBuffer(buf, origOff, off);
					origOff = off;
					endBlock(false, sbPtr);
					sbPtr = 0;
				}
			}
			bufferPtr = sbPtr;
			updateUCBuffer(buf, origOff, off);
			return;
		}
		
		/*
		 * We have some special code for the first two bytes ever.
		 */
		if (windowState < 2) {
			int bv = buf[off++] & 0xFF;
			len--;
			if (windowState == 0) {
				recentBytes = bv;
				seqLen = 1;
				if (len == 0) {
					updateUCBuffer(buf, origOff, off);
					return;
				}
				bv = buf[off++] & 0xFF;
				len--;
			}
			recentBytes = (recentBytes << 8) | bv;
			windowState = 2;
			seqLen = 2;
			if (len == 0) {
				updateUCBuffer(buf, origOff, off);
				return;
			}
		}
		
		/*
		 * We cache most instance fields in local variables. This helps the JIT compiler produce efficient code.
		 */
		int[] win = window;
		int winMask = win.length - 1;
		int winPtr = windowPtr;
		int recent = recentBytes;
		char[] wlink = windowLink;
		char[] ht = hashTable;
		int htMask = ht.length - 1;
		int sLen = seqLen;
		int sPtr = seqPtr;
		int sDist = seqDist;
		int[] sb = buffer;
		int sbPtr = bufferPtr;
		int sbLen = sb.length;
		int maxCL0 = maxChainLengthTriplet;
		int maxDistCL0 = maxDistanceTriplet;
		int maxCL1 = maxChainLengthSeq1;
		int maxDistCL1 = maxDistanceSeq1;
		int goodSLen = goodSequenceLength;
		int maxLLen = maxLazyLength;
		int maxCL2 = maxChainLengthSeq2;
		int maxDistCL2 = maxDistanceSeq2;
		
		loop: while (len-- > 0) {
			int b0 = buf[off++] & 0xFF;
			
			/*
			 * Compute the current triplet.
			 */
			int triplet = (recent << 8) | b0;
			recent = triplet & 0xFFFF;
			
			/*
			 * Update the window and set hash links.
			 */
			win[winPtr] = triplet;
			int h = (triplet + (triplet >>> 4) + (triplet >>> 8) + (triplet >>> 9) - (triplet >>> 16)) & htMask;
			int link = ht[h] - 1;
			int dist;
			if (link < 0) {
				dist = 0;
			}
			else {
				int pv = win[link];
				int ph = (pv + (pv >>> 4) + (pv >>> 8) + (pv >>> 9) - (pv >>> 16)) & htMask;
				if (ph == h) {
					dist = (winPtr - link) & winMask;
				}
				else {
					dist = 0;
				}
			}
			ht[h] = (char) (winPtr + 1);
			wlink[winPtr] = (char) dist;
			
			int thisPtr = winPtr;
			winPtr = (winPtr + 1) & winMask;
			
			/*
			 * If the current sequence length is 0 or 1, then we do not have a complete triplet yet, and there is
			 * nothing more to do.
			 */
			if (sLen < 2) {
				sLen++;
				continue loop;
			}
			
			/*
			 * If we just completed a triplet, then we look for a previous triplet. If we find one, then we begin a
			 * match sequence; otherwise, we emit a literal for the first byte of our triplet, and we continue. There is
			 * a corner case when the previous triplet is at the maximum distance: in that situation, we must emit the
			 * copy symbol immediately.
			 */
			if (sLen == 2) {
				if (dist == 0) {
					sb[sbPtr++] = triplet >>> 16;
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
					continue loop;
				}
				sDist = dist;
				findTriplet: do {
					int n = link;
					int chainLen = maxCL0;
					while (chainLen-- > 0) {
						if (win[n] == triplet)
							break findTriplet;
						int d = wlink[n];
						if (d == 0)
							break;
						sDist += d;
						if (sDist > maxDistCL0)
							break;
						n = (n - d) & winMask;
					}
					sDist = 0;
				}
				while (false);
				if (sDist == 0) {
					sb[sbPtr++] = triplet >>> 16;
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
				}
				else {
					sPtr = (thisPtr - sDist) & winMask;
					sLen = 3;
					if (sPtr == winPtr) {
						sb[sbPtr++] = makeCopySymbol(sLen, sDist);
						if (sbPtr == sbLen) {
							updateUCBuffer(buf, origOff, off);
							origOff = off;
							endBlock(false, sbPtr);
							sbPtr = 0;
						}
						sLen = 0;
					}
				}
				continue loop;
			}
			
			/*
			 * We are currently matching a sequence. We try to augment it. If we can, then we just do that; but we must
			 * mind the maximum sequence length and also its distance.
			 */
			if (win[(thisPtr - sDist) & winMask] == triplet) {
				sLen++;
				if (sPtr == winPtr || sLen == 258) {
					sb[sbPtr++] = makeCopySymbol(sLen, sDist);
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
					sLen = 0;
				}
				continue loop;
			}
			
			/*
			 * The current sequence cannot be agumented. We try to find a longer match in the previous sequences.
			 */
			int sDistNew = findPreviousSequence(win, winMask, wlink, sPtr, sDist, sLen - 2, triplet, sLen > goodSLen ? (maxCL1 >>> 2) : maxCL1, maxDistCL1);
			if (sDistNew > 0) {
				sDist = sDistNew;
				sPtr = (thisPtr - (sLen - 2) - sDistNew) & winMask;
				sLen++;
				if (sPtr == winPtr || sLen == 258) {
					sb[sbPtr++] = makeCopySymbol(sLen, sDist);
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
					sLen = 0;
				}
				continue loop;
			}
			
			/*
			 * We could not find a longer sequence. We must flush something. We try to find a longer sequence beginning
			 * at the next byte (that's what RFC 1951 calls lazy matching).
			 */
			if (sLen <= maxLLen) {
				int refPtr = (thisPtr - (sLen - 2) + 1) & winMask;
				int mdc = maxDistCL2;
				if (mdc > sDist)
					mdc = sDist;
				sDistNew = findPreviousSequence(win, winMask, wlink, refPtr, 0, sLen - 3, triplet, maxCL2, mdc);
				if (sDistNew > 0) {
					sb[sbPtr++] = win[sPtr] >>> 16;
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
					sDist = sDistNew;
					sPtr = (refPtr - sDistNew) & winMask;
					if (sPtr == winPtr) {
						sb[sbPtr++] = makeCopySymbol(sLen, sDist);
						if (sbPtr == sbLen) {
							updateUCBuffer(buf, origOff, off);
							origOff = off;
							endBlock(false, sbPtr);
							sbPtr = 0;
						}
						sLen = 0;
					}
					continue loop;
				}
			}
			
			/*
			 * The current sequence is finished and we could not find any better. We filter out three-byte sequences
			 * which are too far: for these sequences, the copy symbol with its distance code and extra bits may be more
			 * expensive than simply writing out the literal bytes.
			 * 
			 * The threshold for this test has been determined by compressing many files and seeing how the result
			 * compares with what gzip produces. It seems that the "right" value is 6144.
			 */
			if (sLen == 3 && sDist > 6144) {
				int ot = win[sPtr];
				for (int k = 16; k >= 0; k -= 8) {
					sb[sbPtr++] = (ot >>> k) & 0xFF;
					if (sbPtr == sbLen) {
						updateUCBuffer(buf, origOff, off);
						origOff = off;
						endBlock(false, sbPtr);
						sbPtr = 0;
					}
				}
			}
			else {
				sb[sbPtr++] = makeCopySymbol(sLen, sDist);
				if (sbPtr == sbLen) {
					updateUCBuffer(buf, origOff, off);
					origOff = off;
					endBlock(false, sbPtr);
					sbPtr = 0;
				}
			}
			sLen = 1;
		}
		
		/*
		 * Flush out the local copies of the instance fields.
		 */
		windowPtr = winPtr;
		recentBytes = recent;
		seqLen = sLen;
		seqPtr = sPtr;
		seqDist = sDist;
		bufferPtr = sbPtr;
		
		/*
		 * Do not forget the original data bytes.
		 */
		updateUCBuffer(buf, origOff, off);
	}
	
	/**
	 * Prepare for a flush / terminate: the current dangling sequence is output.
	 * 
	 * @throws IOException
	 *             on I/O error with the transport stream
	 */
	private void prepareFlush() throws IOException {
		switch (seqLen) {
		case 0:
			break;
		case 1:
			buffer[bufferPtr++] = recentBytes & 0xFF;
			if (bufferPtr == buffer.length)
				endBlock(false, bufferPtr);
			break;
		case 2:
			buffer[bufferPtr++] = (recentBytes >>> 8) & 0xFF;
			if (bufferPtr == buffer.length)
				endBlock(false, bufferPtr);
			buffer[bufferPtr++] = recentBytes & 0xFF;
			if (bufferPtr == buffer.length)
				endBlock(false, bufferPtr);
			break;
		default:
			buffer[bufferPtr++] = makeCopySymbol(seqLen, seqDist);
			if (bufferPtr == buffer.length)
				endBlock(false, bufferPtr);
			break;
		}
		seqLen = 0;
	}
	
	/* =========================================================== */

	/*
	 * Huffman trees.
	 * 
	 * The optimal Huffman tree cannot always be used, because we have additional constraints on the code lengths. The
	 * generic algorithm is called "package-merge" but it is relatively expensive (O(Ln) where L is the maximum code
	 * length and n is the alphabet size. We rather implement a "tree tweak": we begin with the optimal Huffman tree,
	 * and then we tweak it until it fits in the length constraints (it turns out that zlib uses a very similar trick).
	 * 
	 * The tricky point is that once we have sorted the symbol by frequencies, then we just need to compute the number
	 * of codes for each code length. The actual code lengths for all symbols are easy to compute back by giving the
	 * longer codes to the least often encountered symbols (we may somehow obtain a locally optimal yet different tree,
	 * but this is no problem since in fine we will convert the tree to the canonical Huffman codes anyway). That idea
	 * apparently comes from someone called Haruhiko Okumura.
	 * 
	 * Tree tweaking is applied when the optimal tree has "overdeep" leaves. Basically, we have a number of non-leaf
	 * nodes at the maximum depth (call "p" that number of nodes). Each is the root for a subtree containing q_i leaves
	 * which are all overdeep (1 <= i <= q). If we have q = \sum_i q_i overdeep leaves, then we must relocate those q
	 * leaves: -- p leaves will use the locations held by the subtree roots at maximum depth; -- q-p leaves will have to
	 * be relocated elsewhere. Hence, a first pass is used to obtain the number of codes for all valid code lengths; in
	 * that pass, the subtree roots at maximum depth are accounted as leaves (this handles the p "natural" relocations),
	 * and the value of q-p is returned. Relocation for those values works thus: to relocate a leaf, find a valid code
	 * of length l (strictly lower than the maximum length) and replace it with two codes of length l+1: the code is
	 * lengthened by one bit, which leaves room for one brother. We iterate over all leaves to relocate, choosing each
	 * time the longest possible code.
	 * 
	 * It shall be noted that for the values used in DEFLATE, it is not possible that _all_ optimal codes exceed the
	 * maximum code length. Actually, the tweaking mechanism cannot fail with the parameters which will be used.
	 * 
	 * The sorting step uses the Heap Sort, because it is an elegant algorithm which uses O(1) additional space and has
	 * a nice O(n log n) guaranteed worst case. A QuickSort would be faster on average, but I like the Heap Sort better,
	 * and this part is not a CPU bottleneck anyway.
	 */

	/**
	 * Sort the provided array of integer values (ascending order). Only the values between indexes off
	 * (inclusive) and off + len (exclusive) are touched; other array elements are neither considered nor
	 * modified.
	 * 
	 * @param val
	 *            the array to sort
	 * @param off
	 *            the array offset
	 * @param len
	 *            the number of elements to sort
	 */
	private static void heapSort(int[] val, int off, int len) {
		if (len <= 1)
			return;
		
		/*
		 * We use virtual indexes, between 1 and len (inclusive). In our internal heap, the two children of node of
		 * virtual index "i" have virtual indexes "2*i" and "2*i+1".
		 */
		int corr = off - 1;
		
		/*
		 * Build the heap by inserting all values at the bottom, and then making them float up as necessary.
		 */
		for (int i = 2; i <= len; i++) {
			int j = i;
			int v = val[corr + j];
			while (j > 1) {
				int k = j >>> 1;
				int f = val[corr + k];
				if (f > v)
					break;
				val[corr + j] = f;
				val[corr + k] = v;
				j = k;
			}
		}
		
		/*
		 * Repeatedly, extract the heap root (maximum element) and rebuild the heap by promoting the bottom element and
		 * having it sink to its proper place.
		 */
		for (int i = len; i > 1; i--) {
			int v = val[corr + i];
			val[corr + i] = val[corr + 1];
			val[corr + 1] = v;
			int j = 1;
			for (;;) {
				int kl = j << 1;
				int kr = kl + 1;
				if (kl >= i)
					break;
				int si;
				int sv;
				if (kr >= i) {
					si = kl;
					sv = val[corr + kl];
				}
				else {
					int cl = val[corr + kl];
					int cr = val[corr + kr];
					if (cl > cr) {
						si = kl;
						sv = cl;
					}
					else {
						si = kr;
						sv = cr;
					}
				}
				if (v > sv)
					break;
				val[corr + j] = sv;
				val[corr + si] = v;
				j = si;
			}
		}
	}
	
	/**
	 * 

* Compute the optimal Huffman tree, then tweak it so that it fits within the specified maximum code length. * Returned value is the resulting code length for each symbol. From these lengths, the canonical Huffman code can * be rebuilt. *

* *

* The alphabet size is assumed to be equal to freq.length. *

* *

* The following properties MUST be met: *

    *
  • frequencies are positive integers (0 is allowed, and qualifies a symbol which does not occur at all);
  • *
  • the sum of all frequencies must be strictly lower than 2097152 (i.e. that sum must fit in a 21-bit unsigned * integer field).
  • *
*

* * @param freq * the symbol frequencies * @param maxCodeLen * the maximum code length * @return the resulting code lengths */ private static int[] makeHuffmanCodes(int[] freq, int maxCodeLen) { int alphLen = freq.length; /* * We sort symbols by frequencies. Each value in freqTmp[] consists of: -- symbol value (9 bits) -- flag * "literal" set (1 bit, equal to 1) -- symbol frequency (21 bits) * * Since the frequencies use the upper bits, we can compare those values directly. A side effect is that no two * values will be considered as equal to each other, even for two symbols which occur with the same frequency; * this is harmless. */ int[] freqTmp = new int[alphLen]; for (int i = 0; i < alphLen; i++) freqTmp[i] = i + (1 << 9) + (freq[i] << 10); heapSort(freqTmp, 0, alphLen); /* * We skip the values with frequency zero; they will not take part in the tree construction. We handle * immediately the special case where only one symbol occurs. */ int freqTmpPtr = 0; while (freqTmpPtr < alphLen && (freqTmp[freqTmpPtr] >>> 10) == 0) freqTmpPtr++; if (freqTmpPtr == alphLen) { /* * No symbol occurs (degenerate case). */ return new int[alphLen]; } if (freqTmpPtr == (alphLen - 1)) { /* * Only onw symbol occurs. */ int[] clen = new int[alphLen]; clen[freqTmp[freqTmpPtr] & 0x1FF] = 1; return clen; } /* * We use an additional FIFO in which we store the built tree nodes. All nodes make it to that fifo, and we do * not move elements inside it; rather, we shift the limit indexes. There are at most alphLen-1 nodes in the * tree (possibly less if some symbols are not used). */ int[] fifo = new int[alphLen - 1]; int fifoHead = 0, fifoRear = 0; /* * The tree is stored in an array. Each array element specifies a non-leaf node and consists of two 10-bit * values, for the left and right node children, respectively. Each such 10-bit value specifies either a leaf * symbol (9-bit value, 10th bit set) or the index for a non-leaf child node (9-bit index, 10th bit cleared). * The tree root index is stored in rootIndex when the tree is finished. * * The tree is complete (no missing child anywhere) and has at most alphLen leaves (since we skip symbols which * do not occur, the actual tree can be smaller); hence, it may have up to alphLen-1 non-leaf nodes. */ int[] tree = new int[alphLen - 1]; int treePtr = 0; int rootIndex; /* * Tree building uses the classical Huffman code construction algorithm. We have two queues, the one in * freqTmp[] (the yet unprocessed leaves) and the one in fifo[] (the built subtrees). At each step, we take the * two least frequent elements (not necessarily one from each queue) and assemble them into a new subtree, which * we insert in the fifo. The algorithm ends when freqTmp[] is empty (all symbols attached) and fifo[] contains * a single subtree (which is the complete tree). */ for (;;) { if (freqTmpPtr == alphLen && fifoRear == (fifoHead + 1)) { rootIndex = fifo[fifoHead] & 0x1FF; break; } int n0, n1; if (fifoRear == fifoHead) { n0 = freqTmp[freqTmpPtr++]; n1 = freqTmp[freqTmpPtr++]; } else if (freqTmpPtr == alphLen) { n0 = fifo[fifoHead++]; n1 = fifo[fifoHead++]; } else { int f = fifo[fifoHead]; int q = freqTmp[freqTmpPtr]; if (f < q) { n0 = f; fifoHead++; } else { n0 = q; freqTmpPtr++; } if (fifoHead == fifoRear) { n1 = freqTmp[freqTmpPtr++]; } else if (freqTmpPtr == alphLen) { n1 = fifo[fifoHead++]; } else { f = fifo[fifoHead]; q = freqTmp[freqTmpPtr]; if (f < q) { n1 = f; fifoHead++; } else { n1 = q; freqTmpPtr++; } } } int ni = (n0 & ~0x3FF) + (n1 & ~0x3FF) + treePtr; fifo[fifoRear++] = ni; int nv = (n0 & 0x3FF) + ((n1 & 0x3FF) << 10); tree[treePtr++] = nv; } /* * Now, we gather the count for each code length, and the number of overdeep leaves which must be relocated. */ int[] blCount = new int[maxCodeLen + 1]; int overdeep = getCodeLengths(tree, rootIndex, 0, blCount, maxCodeLen); /* * We relocate the overdeep leaves. * * "dpi" contains the code length where we find nodes to demote (i.e. for which we lengthen the code) in order * to find some room for a relocated leaf. That length is kept as long as possible, provided that it is strictly * lower than maxCodeLen, and that there remain codes with that length. */ int dpi = maxCodeLen; while (overdeep-- > 0) { if (dpi == maxCodeLen) { do { dpi--; } while (blCount[dpi] == 0); } blCount[dpi]--; blCount[++dpi] += 2; } /* * We rebuild the code lengths. We just walk the symbols sorted by frequencies, and give them code lengths, * according to the counts in blCount[]. */ int[] codeLen = new int[alphLen]; int p = 0; while ((freqTmp[p] >>> 10) == 0) p++; for (int bits = maxCodeLen; bits > 0; bits--) { for (int k = blCount[bits]; k > 0; k--) { int sym = freqTmp[p++] & 0x1FF; codeLen[sym] = bits; } } /* * We have finished: we computed the code lengths for all symbols. These code lengths are for an optimized (not * necessarily optimal) Huffman tree which fits in the allowed maximum code length. */ return codeLen; } /** * Count the code lengths for the provided subtree; the counts are accumulated in the blCount[] array. * Non-leaf nodes at exactly the maximum depth are accounted as leaves with the maximum code length. The number of * overdeep leaves which must be relocated is returned. * * @param tree * the tree array * @param idx * the subtree root index (or leaf code) * @param depth * the subtree root depth * @param blCount * the code length count accumulator array * @param maxCodeLen * the maximum code length * @return the number of overdeep leaves to relocate */ private static int getCodeLengths(int[] tree, int idx, int depth, int[] blCount, int maxCodeLen) { if ((idx & 0x200) != 0) { if (depth > maxCodeLen) { return 1; } else { blCount[depth]++; return 0; } } int s; if (depth == maxCodeLen) { blCount[maxCodeLen]++; s = -1; } else { s = 0; } int n = tree[idx]; int l = n & 0x3FF; int r = (n >>> 10) & 0x3FF; s += getCodeLengths(tree, l, depth + 1, blCount, maxCodeLen); s += getCodeLengths(tree, r, depth + 1, blCount, maxCodeLen); return s; } /** * The fixed Huffman codes, for the literal+length alphabet. */ private static final int[] FIXED_LIT_CODE; static { int[] fixedLitCodeLen = new int[288]; for (int i = 0; i < 144; i++) fixedLitCodeLen[i] = 8; for (int i = 144; i < 256; i++) fixedLitCodeLen[i] = 9; for (int i = 256; i < 280; i++) fixedLitCodeLen[i] = 7; for (int i = 280; i < 288; i++) fixedLitCodeLen[i] = 8; FIXED_LIT_CODE = makeCanonicalHuff(fixedLitCodeLen, 15); } /** * The fixed Huffman codes, for the distance alphabet. */ private static final int[] FIXED_DIST_CODE; static { int[] fixedDistCodeLen = new int[32]; for (int i = 0; i < 32; i++) fixedDistCodeLen[i] = 5; FIXED_DIST_CODE = makeCanonicalHuff(fixedDistCodeLen, 15); } /** * RLE-compress two Huffman trees (for the literal+length and distance alphabets, represented as arrays of code * lengths). This results in values from 0 to 18 (5 low bits), where values 16, 17 and 18 have extra bits (bit 5 and * beyond). The frequencies for the resulting values are also accumulated in the freq[] array. * * @param tree1 * the first tree * @param tree1len * the first tree actual length * @param tree2 * the second tree * @param tree2len * the second tree actual length * @param freq * an array which receives frequencies * @return the compressed trees */ private static int[] compressTrees(int[] tree1, int tree1len, int[] tree2, int tree2len, int[] freq) { int inLen = tree1len + tree2len; int[] in = new int[inLen]; System.arraycopy(tree1, 0, in, 0, tree1len); System.arraycopy(tree2, 0, in, tree1len, tree2len); int ptr = 0; int[] ct = new int[inLen]; int ctPtr = 0; while (ptr < inLen) { int v = in[ptr++]; if (v == 0) { int r = 1; while (r < 138 && ptr < inLen) { if (in[ptr] != 0) break; r++; ptr++; } switch (r) { case 1: ct[ctPtr++] = 0; freq[0]++; break; case 2: ct[ctPtr++] = 0; ct[ctPtr++] = 0; freq[0] += 2; break; default: if (r <= 10) { ct[ctPtr++] = 17 + ((r - 3) << 5); freq[17]++; } else { ct[ctPtr++] = 18 + ((r - 11) << 5); freq[18]++; } break; } } else { int r = 0; while (r < 6 && ptr < inLen) { if (in[ptr] != v) break; r++; ptr++; } ct[ctPtr++] = v; freq[v]++; switch (r) { case 0: break; case 1: ct[ctPtr++] = v; freq[v]++; break; case 2: ct[ctPtr++] = v; ct[ctPtr++] = v; freq[v] += 2; break; default: ct[ctPtr++] = 16 + ((r - 3) << 5); freq[16]++; break; } } } int[] res = new int[ctPtr]; System.arraycopy(ct, 0, res, 0, ctPtr); return res; } /** * This array encodes the permutation for the values encoding the RLE-compressed trees. */ /* =========================================================== */ /* * Block assembly and data output. * * The LZ77 code assembles symbols in the buffer[] array. We compute appropriate Huffman trees, and then produce the * compressed blocks. We first get all frequencies, compute the dynamic Huffman trees, and deduce the compressed * block size. We also use the frequencies to know what size we would get with the fixed Huffman trees. We compare * these two sizes with each other, and with the size for uncompressed blocks; we will use whichever method yields * the smallest size. * * When producing an uncompressed block, we need to rebuild the uncompressed data. The ucBuffer[] array contains the * first uncompressed bytes for this block; the subsequent bytes can be deduced by using the buffered symbols. * * Technically, we could use the same technique for type 2 blocks, in order to know whether a given short sequence * is worth copying. But this is tricky because if we decide to switch symbols at that time, then the Huffman codes * themselves have been computed with "wrong" frequencies. Right now, we filter out those sequences heuristically in * the LZ77 stage. */ /** * Write out some bits (least significant bit exits first). * * @param val * the bit values * @param num * the number of bits (possibly 0) * @throws IOException * on I/O error with the transport stream */ private void writeBits(int val, int num) throws IOException { if (num == 0) return; for (;;) { int fs = 8 - outPtr; int v = outByte | (val << outPtr); if (fs > num) { outByte = v; outPtr += num; return; } else if (fs == num) { outBuf[outBufPtr++] = (byte) v; if (outBufPtr == outBuf.length) { out.write(outBuf); outBufPtr = 0; } outByte = 0; outPtr = 0; return; } outBuf[outBufPtr++] = (byte) v; if (outBufPtr == outBuf.length) { out.write(outBuf); outBufPtr = 0; } val >>>= fs; num -= fs; outByte = 0; outPtr = 0; } } /** * Send buffered complete output bytes. * * @throws IOException * on I/O error with the transport stream */ private void sendBuffered() throws IOException { if (outBufPtr > 0) { out.write(outBuf, 0, outBufPtr); outBufPtr = 0; } } /** * End the current block and compress it. The bufferPtr field value may be incorrect; the value * provided in the sbPtr parameter must be used instead. This method resets the bufferPtr * field to 0. * * @param fin * true for the final block * @param sbPtr * the correct value for bufferPtr * @throws IOException * on I/O error with the transport stream */ private void endBlock(boolean fin, int sbPtr) throws IOException { if (noOutput) { bufferPtr = 0; return; } int[] sb = buffer; int[] freqLit = new int[286]; int[] freqDist = new int[30]; /* * Do not forget the EOB symbol. */ freqLit[256] = 1; /* * Get the uncompressed data length; also, gather frequencies. */ int csU = 0; for (int i = 0; i < sbPtr; i++) { int val = sb[i]; int sym = val & 0x1FF; freqLit[sym]++; if (sym < 256) { csU += 8; continue; } csU += 8 * (LENGTH[sym - 257] + ((val >>> 9) & 0x1F)); int dist = (val >>> 14) & 0x1F; freqDist[dist]++; } /* * Adjust csU to account for the header bytes. Also, save the uncompressed data length (in bytes) in uDataLen. */ int csUextra = 0; for (int t = 0; t < csU; t += 65535) { if (t == 0) { if (outPtr > 5) { csUextra = 48 - outPtr; } else { csUextra = 40 - outPtr; } } else { csUextra += 40; } } int uDataLen = (csU >>> 3); csU += csUextra; /* * Compute the dynamic Huffman codes and get the lengths for dynamic and fixed codes. */ Huff huff = new Huff(freqLit, freqDist); int csD = huff.getDynamicBitLength(); int csF = huff.getFixedBitLength(); /* * We now have the bit lengths for uncompressed blocks (csU), fixed Huffman codes (csF) and dynamic Huffman * codes (csD). We use the smallest. On equality, we prefer uncompressed blocks over Huffman codes, and fixed * Huffman codes over dynamic Huffman codes. */ if (csU <= csF && csU <= csD) { writeBits(fin ? 1 : 0, 3); if (outPtr > 0) writeBits(0, 8 - outPtr); writeBits(uDataLen | (~uDataLen << 16), 32); sendBuffered(); out.write(ucBuffer, 0, uDataLen); } else if (csF <= csD) { /* * Fixed Huffman codes. */ /* * Block header (3 bits). */ writeBits(fin ? 3 : 2, 3); /* * Now, write out the data. */ for (int i = 0; i < sbPtr; i++) { int val = buffer[i]; int sym = val & 0x1FF; if (sym < 256) { writeBits(FIXED_LIT_CODE[sym], sym < 144 ? 8 : 9); continue; } writeBits(FIXED_LIT_CODE[sym], sym < 280 ? 7 : 8); int eLenNum = LENGTH_ENUM[sym - 257]; if (eLenNum > 0) writeBits((val >>> 9) & 0x1F, eLenNum); int dist = (val >>> 14) & 0x1F; writeBits(FIXED_DIST_CODE[dist], 5); int eDistNum = DIST_ENUM[dist]; if (eDistNum > 0) writeBits(val >>> 19, eDistNum); } /* * Write out the EOB. */ writeBits(FIXED_LIT_CODE[256], 7); } else { /* * Dynamic Huffman codes. */ int[] litCode = huff.getLitCode(); int[] litCodeLen = huff.getLitCodeLen(); int[] distCode = huff.getDistCode(); int[] distCodeLen = huff.getDistCodeLen(); int[] compTrees = huff.getCompTrees(); int compTreesLen = compTrees.length; int[] ctCode = huff.getCTCode(); int[] ctCodeLen = huff.getCTCodeLen(); int[] permCT = huff.getPermCT(); int permCTLen = permCT.length; /* * Block header (3 bits). */ writeBits(fin ? 5 : 4, 3); /* * The tree lengths. */ writeBits(litCode.length - 257, 5); writeBits(distCode.length - 1, 5); writeBits(permCTLen - 4, 4); /* * The CT tree. */ for (int i = 0; i < permCTLen; i++) writeBits(permCT[i], 3); /* * The two compressed trees. */ for (int i = 0; i < compTreesLen; i++) { int v = compTrees[i]; int s = v & 0x1F; writeBits(ctCode[s], ctCodeLen[s]); int ebits; switch (s) { case 16: ebits = 2; break; case 17: ebits = 3; break; case 18: ebits = 7; break; default: continue; } writeBits((v >>> 5), ebits); } /* * Now, write out the data. */ for (int i = 0; i < sbPtr; i++) { int val = buffer[i]; int sym = val & 0x1FF; writeBits(litCode[sym], litCodeLen[sym]); if (sym < 256) continue; int eLenNum = LENGTH_ENUM[sym - 257]; if (eLenNum > 0) writeBits((val >>> 9) & 0x1F, eLenNum); int dist = (val >>> 14) & 0x1F; writeBits(distCode[dist], distCodeLen[dist]); int eDistNum = DIST_ENUM[dist]; if (eDistNum > 0) writeBits(val >>> 19, eDistNum); } /* * Write out the EOB. */ writeBits(litCode[256], litCodeLen[256]); } sendBuffered(); bufferPtr = 0; /* * Adjust ucBuffer[]. Some data has been processed, but some may remain (at most 258 bytes, corresponding to the * currently matched sequence). We must take care: the buffer is circular. We use the fact that the buffer is * more than twice as large than the maximum amout of data we move around. */ int uLen = ucBuffer.length; int uRealLen = ucBufferPtr; while (uRealLen < uDataLen) uRealLen += uLen; if (uDataLen < uRealLen) { int tm = uRealLen - uDataLen; /* DEBUG */ if (tm > 258) throw new Error("too much data: " + tm); if (tm <= ucBufferPtr) { System.arraycopy(ucBuffer, ucBufferPtr - tm, ucBuffer, 0, tm); } else { int fpl = tm - ucBufferPtr; System.arraycopy(ucBuffer, 0, ucBuffer, fpl, ucBufferPtr); System.arraycopy(ucBuffer, uLen - fpl, ucBuffer, 0, fpl); } } ucBufferPtr = uRealLen - uDataLen; } /** * Instances of this class compute the dynamic Huffman codes for some frequencies, and report the resulting length, * for both dynamic and static codes. */ private static class Huff { private int[] litCode, litCodeLen; private int[] distCode, distCodeLen; private int[] compTrees; private int[] ctCode, ctCodeLen; private int[] permCT; private int csD, csF; /** * Build the instance with the provided frequencies for the literal+length and the distance alphabets. The first * frequency array MUST include the value 1 for the EOB symbol (value 256). * * @param freqLit * the literal+length frequencies * @param freqDist * the distance frequencies */ private Huff(int[] freqLit, int[] freqDist) { csD = 17; csF = 3; litCodeLen = makeHuffmanCodes(freqLit, 15); distCodeLen = makeHuffmanCodes(freqDist, 15); for (int i = 0; i < litCodeLen.length; i++) { int f = freqLit[i]; int elen; elen = (i >= 257) ? LENGTH_ENUM[i - 257] : 0; csD += (litCodeLen[i] + elen) * f; int fcl; if (i < 256) { fcl = (i < 144) ? 8 : 9; } else { fcl = (i < 280) ? 7 : 8; } csF += (fcl + elen) * f; } for (int i = 0; i < distCodeLen.length; i++) { int f = freqDist[i]; int edist = DIST_ENUM[i]; csD += (distCodeLen[i] + edist) * f; csF += (5 + edist) * f; } /* * RLE-compress the two codes. */ litCode = makeCanonicalHuff(litCodeLen, 15); distCode = makeCanonicalHuff(distCodeLen, 15); if (distCode.length == 0) distCode = new int[1]; int[] freqCT = new int[19]; compTrees = compressTrees(litCodeLen, litCode.length, distCodeLen, distCode.length, freqCT); /* * Compute the Huffman tree for the RLE-compressed trees. */ ctCodeLen = makeHuffmanCodes(freqCT, 7); ctCode = makeCanonicalHuff(ctCodeLen, 7); for (int i = 0; i < 19; i++) { int ccl = ctCodeLen[i]; switch (i) { case 16: ccl += 2; break; case 17: ccl += 3; break; case 18: ccl += 7; break; } csD += freqCT[i] * ccl; } /* * Compute the permuted tree for the RLE-compressed trees, and its minimal length. */ int[] permCTtmp = new int[19]; int permCTLen = 0; for (int i = 0; i < 19; i++) { int len = ctCodeLen[PERM_CT[i]]; if (len > 0) permCTLen = i + 1; permCTtmp[i] = len; } permCT = new int[permCTLen]; System.arraycopy(permCTtmp, 0, permCT, 0, permCTLen); csD += 3 * permCTLen; } /** * Get the literal code values. * * @return the literal code values */ private int[] getLitCode() { return litCode; } /** * Get the literal code lengths. * * @return the literal code lengths */ private int[] getLitCodeLen() { return litCodeLen; } /** * Get the distance code lengths. * * @return the distance code lengths */ private int[] getDistCode() { return distCode; } /** * Get the distance code lengths. * * @return the distance code lengths */ private int[] getDistCodeLen() { return distCodeLen; } /** * Get the RLE-compressed tree representation. * * @return the RLE-compressed representation */ private int[] getCompTrees() { return compTrees; } /** * Get the level-2 code values. * * @return the level-2 code values */ private int[] getCTCode() { return ctCode; } /** * Get the level-2 code lengths. * * @return the level-2 code lengths */ private int[] getCTCodeLen() { return ctCodeLen; } /** * Get the level-2 code lengths, permuted. * * @return the permuted level-2 code lengths */ private int[] getPermCT() { return permCT; } /** * Get the block length, in bits, if dynamic Huffman codes are used. * * @return the block length with dynamic codes */ private int getDynamicBitLength() { return csD; } /** * Get the block length, in bits, if fixed Huffman codes are used. * * @return the block length with fixed codes */ private int getFixedBitLength() { return csF; } } /* =========================================================== */ /* * Flush handling. * * We need some handling for the end of stream. If we have some buffered data, then we may just end the current * block with the "final" flag set; otherwise, we need a new empty block to set that flag. * * We also implement two flush modes. The "partial flush" mimics what zlib does with Z_PARTIAL_FLUSH. Recent * versions of zlib deprecate that option, and do not document it any more, but it is needed for the implementation * of some protocols, e.g. OpenSSH. With this flush mode, we add one or two empty blocks (with fixed Huffman trees), * in order to make sure that the peer has enough compressed data to decompress all meaningful bytes. Whether we * need to add one or two blocks depends on a computation, which hinges on the idea that zlib uses 9 bits of * lookahead. * * The "sync flush" terminates the current block (if any) and appends an empty "uncompressed data" block. That block * includes automatic byte alignment, and ends with the four-byte sequence 00 00 FF FF. A common convention is _not * to include_ that four-byte sequence, in which case the receiver is responsible for adding them. PPP uses this * convention (see RFC 1979). We implement this mode, using a parameter flag to decide whether the four-byte * sequence must be included or not. */ /** * Write out an empty type 1 block (fixed Huffman trees). * * @param fin * true for a final block * @throws IOException * on I/O error with the transport stream */ private void writeEmptySH(boolean fin) throws IOException { writeBits(fin ? 3 : 2, 10); } /** * Write out an empty type 0 block (uncompressed data). * * @param fin * true for a final block * @param wd * false to omit the 00 00 FF FF sequence * @throws IOException * on I/O error with the transport stream */ private void writeEmptyUD(boolean fin, boolean wd) throws IOException { writeBits(fin ? 1 : 0, 3); if (outPtr > 0) writeBits(0, 8 - outPtr); if (wd) writeBits(0xFFFF0000, 32); } /** * Terminate the current compression run. Pending buffered data, if any, is compressed as a final block, and written * out on the transport stream. If there is no pending buffered data, then an empty, final block is added. Either * way, any remaining partial byte is padded with zeroes and written. The transport stream is NOT flushed. * * @throws IOException * on I/O error with the transport stream */ public void terminate() throws IOException { prepareFlush(); if (bufferPtr == 0) { writeEmptySH(true); } else { endBlock(true, bufferPtr); } if (outPtr > 0) writeBits(0, 8 - outPtr); sendBuffered(); } /** * Perform a "sync flush" in a way similar to what is done by zlib with option Z_SYNC_FLUSH. The * current block, if any, is closed, and one empty type 0 block is added. After this call, the stream is * byte-aligned. The type 0 block ends with the aligned four-byte sequence 00 00 FF FF; these four bytes are omitted * if withData is false. The transport stream is NOT flushed. * * @param withData * false to omit the 00 00 FF FF bytes * @throws IOException * on I/O error with the transport stream */ public void flushSync(boolean withData) throws IOException { prepareFlush(); if (bufferPtr != 0) endBlock(false, bufferPtr); writeEmptyUD(false, withData); sendBuffered(); } /** * LENGTH[n] contains the sequence copy length * when the symbol 257+n has been read. The actual * copy length may be augmented with a value from some extra bits. */ static final int[] LENGTH; /** * LENGTH_ENUM[n] contains the number of extra bits * which shall be read, in order to augment the sequence copy * length corresponding to the symbol257+n. */ static final int[] LENGTH_ENUM = { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0 }; /* * Here, we initialize LENGTH[] from LENGTH_ENUM[]. */ static { LENGTH = new int[29]; LENGTH[0] = 3; int l = 3; for (int i = 1; i < 28; i ++) { l += 1 << LENGTH_ENUM[i - 1]; LENGTH[i] = l; } /* * The RFC 1951 specifies that the last symbol specifies * a copy length of 258, not 259. I don't know why. */ LENGTH[28] = 258; } /** * DIST[n] is the copy sequence distance corresponding * to the distance symbol n, possibly augmented by * some extra bits. */ static final int[] DIST; /** * DIST_ENUM[n] contains the number of extra bits * used to augment the copy sequence distance corresponding to * the distance symbol n. */ static final int[] DIST_ENUM = { 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13 }; /* * DIST[] is initialized from DIST_ENUM[]. */ static { DIST = new int[30]; DIST[0] = 1; int d = 1; for (int i = 1; i < 30; i ++) { d += 1 << DIST_ENUM[i - 1]; DIST[i] = d; } } /** * This array encodes the permutation for the values encoding * the RLE-compressed trees. */ static final int[] PERM_CT = { 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 }; /** * Build the canonical Huffman codes, given the length of each * code. null is returned if the code is not * correct. The returned array is trimmed to its minimal size * (trailing codes which do not occur are removed). The codes * are "reversed" (first bit is least significant). * * @param codeLen the code lengths * @param maxCodeLen the maximum code length * @return the codes, or null */ static int[] makeCanonicalHuff(int[] codeLen, int maxCodeLen) { int alphLen = codeLen.length; int actualAlphLen = 0; /* * Compute the number of codes for each length * (by convention, there is no code of length 0). */ int[] blCount = new int[maxCodeLen + 1]; for (int n = 0; n < alphLen; n ++) { int len = codeLen[n]; if (len < 0 || len > maxCodeLen) return null; if (len > 0) { actualAlphLen = n + 1; blCount[len] ++; } } /* * Compute the smallest code for each code length. */ int[] nextCode = new int[maxCodeLen + 1]; int codeVal = 0; for (int bits = 1; bits <= maxCodeLen; bits ++) { codeVal = (codeVal + blCount[bits - 1]) << 1; nextCode[bits] = codeVal; } /* * Compute the code itself for each synbol. We also * count the number of distinct symbols which may appear. */ int[] code = new int[actualAlphLen]; for (int n = 0; n < actualAlphLen; n ++) { int len = codeLen[n]; if (len != 0) { int w = nextCode[len]; if (w >= (1 << len)) return null; code[n] = reverse(w, len); nextCode[len] = w + 1; } } return code; } /** * Bit reverse a value. * * @param cc the value to reverse * @param q the value length, in bits * @return the reversed value */ private static int reverse(int cc, int q) { int v = 0; while (q -- > 0) { v <<= 1; if ((cc & 1) != 0) v ++; cc >>>= 1; } return v; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy