org.apache.poi.hdgf.HDGFLZWCompressor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of xylophone Show documentation
Show all versions of xylophone Show documentation
xylophone - LGPL Java library
The newest version!
/* ====================================================================
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==================================================================== */
package org.apache.poi.hdgf;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
/**
* Helper class to handle the Visio compatible
* streaming LZW compression.
* Need our own class to handle keeping track of the
* code buffer, pending bytes to write out etc.
*
* TODO Fix this, as it starts to go wrong on
* large streams
*/
/* package */ final class HDGFLZWCompressor {
// We use 12 bit codes:
// * 0-255 are real bytes
// * 256-4095 are the substring codes
// Java handily initialises our buffer / dictionary
// to all zeros
private byte[] dict = new byte[4096];
// The next block of data to be written out, minus
// its mask byte
private byte[] buffer = new byte[16];
// And how long it is
// (Un-compressed codes are 1 byte each, compressed codes
// are two)
private int bufferLen = 0;
// The raw length of a code is limited to 4 bits + 2
private byte[] rawCode = new byte[18];
// And how much we're using
private int rawCodeLen = 0;
// How far through the input and output streams we are
private int posInp = 0;
private int posOut = 0;
// What the next mask byte to output will be
private int nextMask = 0;
// And how many bits we've already set
private int maskBitsSet = 0;
public HDGFLZWCompressor() {}
/**
* Returns the last place that the bytes from rawCode are found
* at in the buffer, or -1 if they can't be found
*/
private int findRawCodeInBuffer() {
// Work our way through all the codes until we
// find the right one. Visio starts from the end
for(int i=4096-rawCodeLen; i>0; i--) {
boolean matches = true;
for(int j=0; matches && j> 4);
buffer[bufferLen] = HDGFLZW.fromInt(bp1);
bufferLen++;
buffer[bufferLen] = HDGFLZW.fromInt(bp2);
bufferLen++;
// Copy the data to the dictionary in the new place
for(int i=0; i 0) {
outputCompressed(res);
if(maskBitsSet > 0) {
output8Codes(res);
}
}
break;
}
// Try adding this new byte onto rawCode, and
// see if all of that is still found in the
// buffer dictionary or not
rawCode[rawCodeLen] = dataB;
rawCodeLen++;
int rawAt = findRawCodeInBuffer();
// If we found it and are now at 18 bytes,
// we need to output our pending code block
if(rawCodeLen == 18 && rawAt > -1) {
outputCompressed(res);
rawCodeLen = 0;
continue;
}
// If we did find all of rawCode with our new
// byte added on, we can wait to see what happens
// with the next byte
if(rawAt > -1) {
continue;
}
// If we get here, then the rawCode + this byte weren't
// found in the dictionary
// If there was something in rawCode before, then that was
// found in the dictionary, so output that compressed
rawCodeLen--;
if(rawCodeLen > 0) {
// Output the old rawCode
outputCompressed(res);
// Can this byte start a new rawCode, or does
// it need outputting itself?
rawCode[0] = dataB;
rawCodeLen = 1;
if(findRawCodeInBuffer() > -1) {
// Fits in, wait for next byte
continue;
}
// Doesn't fit, output
outputUncompressed(dataB,res);
rawCodeLen = 0;
} else {
// Nothing in rawCode before, so this byte
// isn't in the buffer dictionary
// Output it un-compressed
outputUncompressed(dataB,res);
}
}
}
}