org.fuzzydb.util.DeflatedString Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.fuzzydb.util Show documentation
Show all versions of org.fuzzydb.util Show documentation
Contains classes not specific to fuzzydb implementation which
could be used in any implementation of fuzzy matching, or as
general utility classes such as those in the geo package.
The newest version!
/******************************************************************************
* Copyright (c) 2004-2008 Whirlwind Match Limited. All rights reserved.
*
* This is open source software; you can use, redistribute and/or modify
* it under the terms of the Open Software Licence v 3.0 as published by the
* Open Source Initiative.
*
* You should have received a copy of the Open Software Licence along with this
* application. if not, contact the Open Source Initiative (www.opensource.org)
*****************************************************************************/
package org.fuzzydb.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.zip.DataFormatException;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;
/**
* A compressed String object. The static methods have best performance.
*
* Typical savings for 5000 short strings, there is upside if strings are longer:
*
* Normal (String)
* 4MB storage
*
* Deflated (DeflatedString) - toString() works
* 2.4MB 60%
*
* Encoded (byte[]) - toString and runtime types unavailable
* 1.8MB 45%
*
* @author ac
*
*/
public class DeflatedString {
private static final int noCompressionLength = 50; // Do not attempt to compress strings shorter than this, performance aid
private static final byte UTF8_CODED = 0;
private static final byte DEFLATE_CODED = 1;
private static final byte EMPTY_CODED = 2;
private static final String emptyString = "";
byte[] encodedData;
public DeflatedString() {
super();
encodedData = null;
}
/**Construct a new DeflatedString with the specified value
* @param value The String to encode
*/
public DeflatedString(String value) {
super();
set(value);
}
/** Encode the specified string to the Deflated string format.
* @param value The String to encode
* @return A byte array coded with DeflatedString's internal format
*/
public static byte[] encode(String value) {
byte[] rval;
if (value==null) {
return null;
}
if (value.length()==0) {
rval = new byte[1];
rval[0] = EMPTY_CODED;
return rval;
}
byte[] utf8;
try {
utf8 = value.getBytes("UTF8");
} catch (UnsupportedEncodingException e) {
// Can't normally happen
throw new RuntimeException("Fatal error in DeflatedString, UTF8 coding not supported");
}
if (value.length() > noCompressionLength) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write(DEFLATE_CODED);
DeflaterOutputStream d = new DeflaterOutputStream(baos);
try {
d.write(utf8);
d.finish();
d.close();
} catch (IOException e) {
// Can't normally happen
throw new RuntimeException("Fatal error in DeflatedString, streams not working");
}
byte[] encoded = baos.toByteArray();
if (encoded.length < (utf8.length+1)) {
rval = encoded;
return rval;
}
}
rval = new byte[utf8.length+1];
rval[0] = UTF8_CODED;
System.arraycopy(utf8, 0, rval, 1, utf8.length);
return rval;
}
/**
* Get the raw encoded data
* @return A byte array coded with DeflatedString's internal format
*/
public byte[] getCoded() {
return encodedData;
}
/**
* Decode the byte array to a String. The byte array must be in DeflatedString format, or else an exception is thrown.
* @param data The coded data to decode
* @return A String
* @throws DataFormatException The data is not in the correct format.
* @see #getCoded()
*/
public static String decode(byte[] data) throws DataFormatException {
try {
if (data == null) {
return null;
}
if (data.length < 1) {
throw new DataFormatException();
}
if (data[0] == EMPTY_CODED) {
return emptyString;
}
if (data.length < 2) {
throw new DataFormatException();
}
if (data[0] == UTF8_CODED) {
return new String(data, 1, data.length-1, "UTF8");
}
if (data[0] == DEFLATE_CODED) {
ByteArrayInputStream bais = new ByteArrayInputStream(data, 1, data.length-1);
InflaterInputStream iis = new InflaterInputStream(bais);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] tmp = new byte[1024];
int bytesMoved;
try {
do {
bytesMoved = iis.read(tmp);
if (bytesMoved>0) baos.write(tmp, 0, bytesMoved);
} while (bytesMoved > -1);
return new String(baos.toByteArray(), "UTF8");
} catch (IOException e) {
throw new DataFormatException("Problem with compressed data");
}
}
throw new DataFormatException("Unknown coding type");
} catch (UnsupportedEncodingException e) {
// Can't normally happen
throw new RuntimeException("Fatal error in DeflatedString, UTF8 coding not supported");
}
}
/**
* Determine if real compression was used to encode the string.
* @return true if the internal format is deflated, false if it is UTF8, null, or empty coded
*/
public boolean isCompressed() {
return (encodedData != null && encodedData[0]==DEFLATE_CODED);
}
/**
* Sets the value of this object. The supplied String is encoded and stored, the previous value is overwritten.
* @param value The String to encode
*/
public void set(String value) {
encodedData = encode(value);
}
/**
* Decodes the String value from the compressed internal buffer.
* @return The decoded String
* @throws DataFormatException The data is corrupt
*/
public String get() throws DataFormatException {
return decode(encodedData);
}
/* (non-Javadoc)
* @see java.lang.Object#toString()
*/
@Override
public String toString() {
try {
return get();
} catch (DataFormatException e) {
return e.toString();
}
}
}