All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.mortbay.util.Utf8StringBuffer Maven / Gradle / Ivy

There is a newer version: 7.0.0.pre5
Show newest version
//========================================================================
//Copyright 2006 Mort Bay Consulting Pty. Ltd.
//------------------------------------------------------------------------
//Licensed under the Apache License, Version 2.0 (the "License");
//you may not use this file except in compliance with the License.
//You may obtain a copy of the License at 
//http://www.apache.org/licenses/LICENSE-2.0
//Unless required by applicable law or agreed to in writing, software
//distributed under the License is distributed on an "AS IS" BASIS,
//WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
//See the License for the specific language governing permissions and
//limitations under the License.
//========================================================================

package org.mortbay.util;

/* ------------------------------------------------------------ */
/** UTF-8 StringBuffer.
 *
 * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append 
 * UTF-8 encoded bytes, that are converted into characters.
 * 
 * This class is stateful and up to 6  calls to {@link #append(byte)} may be needed before 
 * state a character is appended to the string buffer.
 * 
 * The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
 * The UTF-8 code was inspired by http://javolution.org
 * 
 */
public class Utf8StringBuffer 
{
    StringBuffer _buffer;
    int _more;
    int _bits;
    boolean _errors;
    
    public Utf8StringBuffer()
    {
        _buffer=new StringBuffer();
    }
    
    public Utf8StringBuffer(int capacity)
    {
        _buffer=new StringBuffer(capacity);
    }

    public void append(byte[] b,int offset, int length)
    {
        int end=offset+length;
        for (int i=offset; i=0)
        {
            if (_more>0)
            {
                _buffer.append('?');
                _more=0;
                _bits=0;
            }
            else
                _buffer.append((char)(0x7f&b));
        }
        else if (_more==0)
        {
            if ((b&0xc0)!=0xc0)
            {
                // 10xxxxxx
                _buffer.append('?');
                _more=0;
                _bits=0;
            }
            else if ((b & 0xe0) == 0xc0)
            {
                //110xxxxx
                _more=1;
                _bits=b&0x1f;
            }
            else if ((b & 0xf0) == 0xe0)
            {
                //1110xxxx
                _more=2;
                _bits=b&0x0f;
            }
            else if ((b & 0xf8) == 0xf0)
            {
                //11110xxx
                _more=3;
                _bits=b&0x07;
            }
            else if ((b & 0xfc) == 0xf8)
            {
                //111110xx
                _more=4;
                _bits=b&0x03;
            }
            else if ((b & 0xfe) == 0xfc) 
            {
                //1111110x
                _more=5;
                _bits=b&0x01;
            }
        }
        else
        {
            if ((b&0xc0)==0xc0)
            {    // 11??????
                _buffer.append('?');
                _more=0;
                _bits=0;
                _errors=true;
            }
            else
            {
                // 10xxxxxx
                _bits=(_bits<<6)|(b&0x3f);
                if (--_more==0)
                    _buffer.append((char)_bits);
            }
        }
    }
    
    public int length()
    {
        return _buffer.length();
    }
    
    public void reset()
    {
        _buffer.setLength(0);
        _more=0;
        _bits=0;
        _errors=false;
    }
    
    public StringBuffer getStringBuffer()
    {
        return _buffer;
    }
    
    public String toString()
    {
        return _buffer.toString();
    }
    
    /* ------------------------------------------------------------ */
    /** 
     * @return True if there are non UTF-8 characters or incomplete UTF-8 characters in the buffer.
     */
    public boolean isError()
    {
        return _errors || _more>0;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy