All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.fuzzydb.util.DeflatedString Maven / Gradle / Ivy

Go to download

Contains classes not specific to fuzzydb implementation which could be used in any implementation of fuzzy matching, or as general utility classes such as those in the geo package.

The newest version!
/******************************************************************************
 * Copyright (c) 2004-2008 Whirlwind Match Limited. All rights reserved.
 *
 * This is open source software; you can use, redistribute and/or modify
 * it under the terms of the Open Software Licence v 3.0 as published by the 
 * Open Source Initiative.
 *
 * You should have received a copy of the Open Software Licence along with this
 * application. if not, contact the Open Source Initiative (www.opensource.org)
 *****************************************************************************/
package org.fuzzydb.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.zip.DataFormatException;
import java.util.zip.DeflaterOutputStream;
import java.util.zip.InflaterInputStream;

/**
 * A compressed String object. The static methods have best performance.
*
* Typical savings for 5000 short strings, there is upside if strings are longer:
*
* Normal (String)
* 4MB storage
*
* Deflated (DeflatedString) - toString() works
* 2.4MB 60%
*
* Encoded (byte[]) - toString and runtime types unavailable
* 1.8MB 45%
*
* @author ac * */ public class DeflatedString { private static final int noCompressionLength = 50; // Do not attempt to compress strings shorter than this, performance aid private static final byte UTF8_CODED = 0; private static final byte DEFLATE_CODED = 1; private static final byte EMPTY_CODED = 2; private static final String emptyString = ""; byte[] encodedData; public DeflatedString() { super(); encodedData = null; } /**Construct a new DeflatedString with the specified value * @param value The String to encode */ public DeflatedString(String value) { super(); set(value); } /** Encode the specified string to the Deflated string format. * @param value The String to encode * @return A byte array coded with DeflatedString's internal format */ public static byte[] encode(String value) { byte[] rval; if (value==null) { return null; } if (value.length()==0) { rval = new byte[1]; rval[0] = EMPTY_CODED; return rval; } byte[] utf8; try { utf8 = value.getBytes("UTF8"); } catch (UnsupportedEncodingException e) { // Can't normally happen throw new RuntimeException("Fatal error in DeflatedString, UTF8 coding not supported"); } if (value.length() > noCompressionLength) { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(DEFLATE_CODED); DeflaterOutputStream d = new DeflaterOutputStream(baos); try { d.write(utf8); d.finish(); d.close(); } catch (IOException e) { // Can't normally happen throw new RuntimeException("Fatal error in DeflatedString, streams not working"); } byte[] encoded = baos.toByteArray(); if (encoded.length < (utf8.length+1)) { rval = encoded; return rval; } } rval = new byte[utf8.length+1]; rval[0] = UTF8_CODED; System.arraycopy(utf8, 0, rval, 1, utf8.length); return rval; } /** * Get the raw encoded data * @return A byte array coded with DeflatedString's internal format */ public byte[] getCoded() { return encodedData; } /** * Decode the byte array to a String. The byte array must be in DeflatedString format, or else an exception is thrown. * @param data The coded data to decode * @return A String * @throws DataFormatException The data is not in the correct format. * @see #getCoded() */ public static String decode(byte[] data) throws DataFormatException { try { if (data == null) { return null; } if (data.length < 1) { throw new DataFormatException(); } if (data[0] == EMPTY_CODED) { return emptyString; } if (data.length < 2) { throw new DataFormatException(); } if (data[0] == UTF8_CODED) { return new String(data, 1, data.length-1, "UTF8"); } if (data[0] == DEFLATE_CODED) { ByteArrayInputStream bais = new ByteArrayInputStream(data, 1, data.length-1); InflaterInputStream iis = new InflaterInputStream(bais); ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte[] tmp = new byte[1024]; int bytesMoved; try { do { bytesMoved = iis.read(tmp); if (bytesMoved>0) baos.write(tmp, 0, bytesMoved); } while (bytesMoved > -1); return new String(baos.toByteArray(), "UTF8"); } catch (IOException e) { throw new DataFormatException("Problem with compressed data"); } } throw new DataFormatException("Unknown coding type"); } catch (UnsupportedEncodingException e) { // Can't normally happen throw new RuntimeException("Fatal error in DeflatedString, UTF8 coding not supported"); } } /** * Determine if real compression was used to encode the string. * @return true if the internal format is deflated, false if it is UTF8, null, or empty coded */ public boolean isCompressed() { return (encodedData != null && encodedData[0]==DEFLATE_CODED); } /** * Sets the value of this object. The supplied String is encoded and stored, the previous value is overwritten. * @param value The String to encode */ public void set(String value) { encodedData = encode(value); } /** * Decodes the String value from the compressed internal buffer. * @return The decoded String * @throws DataFormatException The data is corrupt */ public String get() throws DataFormatException { return decode(encodedData); } /* (non-Javadoc) * @see java.lang.Object#toString() */ @Override public String toString() { try { return get(); } catch (DataFormatException e) { return e.toString(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy