All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dyuproject.protostuff.StreamedStringSerializer Maven / Gradle / Ivy

The newest version!
package com.dyuproject.protostuff;

import static com.dyuproject.protostuff.StringSerializer.FIVE_BYTE_LOWER_LIMIT;
import static com.dyuproject.protostuff.StringSerializer.FOUR_BYTE_EXCLUSIVE;
import static com.dyuproject.protostuff.StringSerializer.FOUR_BYTE_LOWER_LIMIT;
import static com.dyuproject.protostuff.StringSerializer.INT_MIN_VALUE;
import static com.dyuproject.protostuff.StringSerializer.LONG_MIN_VALUE;
import static com.dyuproject.protostuff.StringSerializer.ONE_BYTE_EXCLUSIVE;
import static com.dyuproject.protostuff.StringSerializer.THREE_BYTE_EXCLUSIVE;
import static com.dyuproject.protostuff.StringSerializer.THREE_BYTE_LOWER_LIMIT;
import static com.dyuproject.protostuff.StringSerializer.TWO_BYTE_EXCLUSIVE;
import static com.dyuproject.protostuff.StringSerializer.TWO_BYTE_LOWER_LIMIT;
import static com.dyuproject.protostuff.StringSerializer.putBytesFromInt;
import static com.dyuproject.protostuff.StringSerializer.putBytesFromLong;
import static com.dyuproject.protostuff.StringSerializer.writeFixed2ByteInt;

import java.io.IOException;

/**
 * UTF-8 String serialization
 *
 * @author David Yu
 * @created Feb 4, 2010
 */
public final class StreamedStringSerializer
{
    
    private StreamedStringSerializer() {}
    
    /**
     * Writes the stringified int into the {@link LinkedBuffer}.
     */
    public static LinkedBuffer writeInt(final int value, final WriteSession session, 
            LinkedBuffer lb) throws IOException
    {
        if(value == Integer.MIN_VALUE)
        {
            final int valueLen = INT_MIN_VALUE.length;
            session.size += valueLen;
            
            if(lb.offset + valueLen > lb.buffer.length)
            {
                // not enough size
                lb.offset = session.flush(lb.buffer, lb.start, lb.offset-lb.start);
                //lb = new LinkedBuffer(session.nextBufferSize, lb);
            }
            
            System.arraycopy(INT_MIN_VALUE, 0, lb.buffer, lb.offset, valueLen);
            
            lb.offset += valueLen;
            
            return lb;
        }
        
        final int size = (value < 0) ? StringSerializer.stringSize(-value) + 1 : StringSerializer.stringSize(value);
        session.size += size;
        
        if(lb.offset + size > lb.buffer.length)
        {
            // not enough size
            lb.offset = session.flush(lb.buffer, lb.start, lb.offset-lb.start);
            //lb = new LinkedBuffer(session.nextBufferSize, lb);
        }
        
        putBytesFromInt(value, lb.offset, size, lb.buffer);
        
        lb.offset += size;
        
        return lb;
    }
    
    /**
     * Writes the stringified long into the {@link LinkedBuffer}.
     */
    public static LinkedBuffer writeLong(final long value, final WriteSession session, 
            LinkedBuffer lb) throws IOException
    {
        if(value == Long.MIN_VALUE)
        {
            final int valueLen = LONG_MIN_VALUE.length;
            session.size += valueLen;
            
            if(lb.offset + valueLen > lb.buffer.length)
            {
                //TODO space efficiency (slower path)
                // not enough size
                lb.offset = session.flush(lb.buffer, lb.start, lb.offset-lb.start);
                //lb = new LinkedBuffer(session.nextBufferSize, lb);
            }
            
            System.arraycopy(LONG_MIN_VALUE, 0, lb.buffer, lb.offset, valueLen);
            
            lb.offset += valueLen;
            
            return lb;
        }
        
        final int size = (value < 0) ? StringSerializer.stringSize(-value) + 1 : StringSerializer.stringSize(value);
        session.size += size;
        
        if(lb.offset + size > lb.buffer.length)
        {
            //TODO space efficiency (slower path)
            // not enough size
            lb.offset = session.flush(lb.buffer, lb.start, lb.offset-lb.start);
            //lb = new LinkedBuffer(session.nextBufferSize, lb);
        }
        
        putBytesFromLong(value, lb.offset, size, lb.buffer);
        
        lb.offset += size;
        
        return lb;
    }
    
    /**
     * Writes the stringified float into the {@link LinkedBuffer}.
     * TODO - skip string conversion and write directly to buffer
     */
    public static LinkedBuffer writeFloat(final float value, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        return writeAscii(Float.toString(value), session, lb);
    }
    
    /**
     * Writes the stringified double into the {@link LinkedBuffer}.
     * TODO - skip string conversion and write directly to buffer
     */
    public static LinkedBuffer writeDouble(final double value, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        return writeAscii(Double.toString(value), session, lb);
    }
    
    /**
     * Writes the utf8-encoded bytes from the string into the {@link LinkedBuffer}.
     */
    public static LinkedBuffer writeUTF8(final String str, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        final int len = str.length();
        if(len == 0)
            return lb;
        
        final byte[] buffer = lb.buffer;
        int limit = buffer.length, 
            offset = lb.offset, 
            i = 0,
            codePoint;
        
        char c, lowSurrogate;
        do
        {
            c = str.charAt(i++);
            if(c < 0x0080)
            {
                if(offset == limit)
                {
                    session.size += (offset-lb.offset);
                    lb.offset = offset = session.flush(buffer, lb.start, offset-lb.start);
                }
                // ascii
                buffer[offset++] = (byte)c;
            }
            else if(c < 0x0800)
            {
                if(offset + 2 > limit)
                {
                    session.size += (offset-lb.offset);
                    lb.offset = offset = session.flush(buffer, lb.start, offset-lb.start);
                }
                
                buffer[offset++] = (byte) (0xC0 | ((c >>  6) & 0x1F));
                buffer[offset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
            }
            else if (i != len &&
                    Character.isHighSurrogate(c) &&
                    Character.isLowSurrogate((lowSurrogate = str.charAt(i))))
            {
                // We have a surrogate pair, so use the 4-byte encoding.
                if (offset + 4 > buffer.length)
                {
                    session.size += (offset - lb.offset);
                    lb.offset = offset = session.flush(buffer, lb.start, offset - lb.start);
                }

                codePoint = Character.toCodePoint(c, lowSurrogate);
                buffer[offset++] = (byte) (0xF0 | ((codePoint >> 18) & 0x07));
                buffer[offset++] = (byte) (0x80 | ((codePoint >> 12) & 0x3F));
                buffer[offset++] = (byte) (0x80 | ((codePoint >> 6) & 0x3F));
                buffer[offset++] = (byte) (0x80 | ((codePoint >> 0) & 0x3F));

                i++;
            }
            else
            {
                if(offset + 3 > limit)
                {
                    session.size += (offset-lb.offset);
                    lb.offset = offset = session.flush(buffer, lb.start, offset-lb.start);
                }
                
                buffer[offset++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
                buffer[offset++] = (byte) (0x80 | ((c >>  6) & 0x3F));
                buffer[offset++] = (byte) (0x80 | ((c >>  0) & 0x3F));
            }
        }
        while(i < len);
        
        session.size += (offset-lb.offset);
        lb.offset = offset;
        
        return lb;
    }
    
    /**
     * Writes the ascii bytes from the string into the {@link LinkedBuffer}.
     * It is the responsibility of the caller to know in advance that the string is 100% ascii.
     * E.g if you convert a double/float to a string, you are sure it only contains ascii chars.
     */
    public static LinkedBuffer writeAscii(final String str, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        final int len = str.length();
        if(len == 0)
            return lb;
        
        int offset = lb.offset;
        final int limit = lb.buffer.length;
        final byte[] buffer = lb.buffer;
        
        // actual size
        session.size += len;
        
        if(offset + len > limit)
        {
            // need to flush
            int index = 0, 
                start = lb.start, 
                bufSize = limit - start, 
                available = limit - offset, 
                remaining = len - available;
            
            // write available space
            while(available-- > 0)
                buffer[offset++] = (byte)str.charAt(index++);
            
            // flush and reset
            offset = session.flush(buffer, start, bufSize);
            
            while(remaining-- > 0)
            {
                if(offset == limit)
                    offset = session.flush(buffer, start, bufSize);
                
                buffer[offset++] = (byte)str.charAt(index++);
            }
        }
        else
        {
            // fast path
            for(int i = 0; i < len; i++)
                buffer[offset++] = (byte)str.charAt(i);
        }
        
        lb.offset = offset;
        
        return lb;
    }
    
    private static void flushAndReset(LinkedBuffer node, final WriteSession session) 
    throws IOException
    {
        int len;
        do
        {
            if((len = node.offset - node.start) > 0)
                node.offset = session.flush(node, node.buffer, node.start, len);
        }
        while((node=node.next) != null);
    }
    
    /**
     * The length of the utf8 bytes is written first (big endian) 
     * before the string - which is fixed 2-bytes.
     * Same behavior as {@link java.io.DataOutputStream#writeUTF(String)}.
     */
    public static LinkedBuffer writeUTF8FixedDelimited(final String str, 
            final WriteSession session, 
            LinkedBuffer lb) throws IOException
    {
        return writeUTF8FixedDelimited(str, false, session, lb);
    }
    
    /**
     * The length of the utf8 bytes is written first before the string - which is  
     * fixed 2-bytes.
     */
    public static LinkedBuffer writeUTF8FixedDelimited(final String str, 
            final boolean littleEndian, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        int lastSize = session.size, 
            len = str.length(), 
            withIntOffset = lb.offset + 2;
        
        // the buffer could very well be almost-full.
        if(withIntOffset + len > lb.buffer.length)
        {
            // flush what we have.
            lb.offset = session.flush(lb.buffer, lb.start, lb.offset - lb.start);
            withIntOffset = lb.offset + 2;
            
            if(len == 0)
            {
                writeFixed2ByteInt(0, lb.buffer, withIntOffset-2, littleEndian);
                lb.offset = withIntOffset;
                // update size
                session.size += 2;
                return lb;
            }
            
            // if true, the string is too large to fit in the buffer
            if(withIntOffset + len > lb.buffer.length)
            {
                lb.offset = withIntOffset;
                
                // slow path
                final LinkedBuffer rb = StringSerializer.writeUTF8(str, 0, len, 
                        lb.buffer, withIntOffset, lb.buffer.length, session, lb);
                
                writeFixed2ByteInt((session.size - lastSize), lb.buffer, 
                        withIntOffset-2, littleEndian);
                
                // update size
                session.size += 2;
                
                assert rb != lb;
                // flush and reset nodes
                flushAndReset(lb, session);
                
                return lb;
            }
        }
        else if(len == 0)
        {
            writeFixed2ByteInt(0, lb.buffer, withIntOffset-2, littleEndian);
            lb.offset = withIntOffset;
            // update size
            session.size += 2;
            return lb;
        }

        // everything fits
        lb.offset = withIntOffset;
        
        final LinkedBuffer rb = StringSerializer.writeUTF8(str, 0, len, session, lb);
        
        writeFixed2ByteInt((session.size - lastSize), lb.buffer, 
                withIntOffset-2, littleEndian);
        
        // update size
        session.size += 2;
        
        if(rb != lb)
        {
            // flush and reset nodes
            flushAndReset(lb, session);
        }
        
        return lb;
    }
    
    private static LinkedBuffer writeUTF8OneByteDelimited(final String str, final int index, 
            final int len, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        int lastSize = session.size, 
            withIntOffset = lb.offset + 1;
        
        // the buffer could very well be almost-full.
        if(withIntOffset + len > lb.buffer.length)
        {
            // flush what we have.
            lb.offset = session.flush(lb.buffer, lb.start, lb.offset - lb.start);
            withIntOffset = lb.offset + 1;
        }

        // everything fits
        lb.offset = withIntOffset;
        
        final LinkedBuffer rb = StringSerializer.writeUTF8(str, index, len, session, lb);
        
        lb.buffer[withIntOffset-1] = (byte)(session.size - lastSize);
        
        // update size
        session.size++;
        
        if(rb != lb)
        {
            // flush and reset nodes
            flushAndReset(lb, session);
        }
        
        return lb;
    }
    
    private static LinkedBuffer writeUTF8VarDelimited(final String str, final int index, 
            final int len, final int lowerLimit, int expectedSize,
            final WriteSession session, final LinkedBuffer lb)
            throws IOException
    {
        int lastSize = session.size,
            offset = lb.offset, 
            withIntOffset = offset + expectedSize;
        
        // the buffer could very well be almost-full.
        if(withIntOffset + len > lb.buffer.length)
        {
            // flush what we have.
            offset = session.flush(lb.buffer, lb.start, lb.offset - lb.start);
            withIntOffset = offset + expectedSize;
            
            // if true, the string is too large to fit in the buffer
            if(withIntOffset + len > lb.buffer.length)
            {
                // not enough space for the string.
                lb.offset = withIntOffset;
                
                // slow path
                final LinkedBuffer rb = StringSerializer.writeUTF8(str, index, len, 
                        lb.buffer, withIntOffset, lb.buffer.length, session, lb);
                
                int size = session.size - lastSize;
                
                if(size < lowerLimit)
                {
                    session.size += (--expectedSize);
                    
                    // we've nothing existing to flush
                    // move one slot to the right
                    int o = ++offset;
                    
                    for (;--expectedSize > 0; size >>>= 7)
                        lb.buffer[o++] = (byte)((size & 0x7F) | 0x80);
                    
                    lb.buffer[o] = (byte)(size);
                    
                    // flush and reset
                    lb.offset = session.flush(lb, lb.buffer, offset, 
                            lb.offset - offset);
                    
                    assert rb != lb;
                    // flush and reset nodes
                    flushAndReset(lb.next, session);
                    
                    return lb;
                }

                // update size
                session.size += expectedSize;
                
                for (;--expectedSize > 0; size >>>= 7)
                    lb.buffer[offset++] = (byte)((size & 0x7F) | 0x80);
                
                lb.buffer[offset] = (byte)(size);
                
                assert rb != lb;
                // flush and reset nodes
                flushAndReset(lb, session);
                
                return lb;
            }
        }

        // everything fits
        lb.offset = withIntOffset;
        
        final LinkedBuffer rb = StringSerializer.writeUTF8(str, index, len, session, lb);
        
        int size = session.size - lastSize;
        
        if(size < lowerLimit)
        {
            // if the buffer was fully used
            // or if the string was atleast 683 bytes
            // for this method, expected size only either be 2/3/4/5
            if(rb != lb || expectedSize != 2) 
            {
                // flush it
                session.size += (--expectedSize);
                
                // move one slot to the right
                int existingOffset = offset, o = ++offset;
                
                for (;--expectedSize > 0; size >>>= 7)
                    lb.buffer[o++] = (byte)((size & 0x7F) | 0x80);
                
                lb.buffer[o] = (byte)(size);
                
                if(existingOffset == lb.start)
                {
                    // nothing was written prior to this string
                    // flush and reset
                    lb.offset = session.flush(lb, lb.buffer, offset, lb.offset - offset);
                }
                else
                {
                    // flush and reset
                    lb.offset = session.flush(lb.buffer, lb.start, existingOffset - lb.start, 
                            lb.buffer, offset, lb.offset - offset);
                }
                
                if(rb != lb)
                {
                    // flush and reset nodes
                    flushAndReset(lb.next, session);
                }
                
                return lb;
            }
            
            // move one slot to the left
            System.arraycopy(lb.buffer, withIntOffset, lb.buffer, withIntOffset - 1, 
                    lb.offset - withIntOffset);
            
            expectedSize--;
            lb.offset--;
        }

        // update size
        session.size += expectedSize;
        
        for (;--expectedSize > 0; size >>>= 7)
            lb.buffer[offset++] = (byte)((size & 0x7F) | 0x80);
        
        lb.buffer[offset] = (byte)(size);
        
        if(rb != lb)
        {
            // flush and reset nodes
            flushAndReset(lb, session);
        }
        
        return lb;
    }
    
    /**
     * The length of the utf8 bytes is written first before the string - which is  
     * a variable int (1 to 5 bytes).
     */
    public static LinkedBuffer writeUTF8VarDelimited(final String str, final WriteSession session, 
            final LinkedBuffer lb) throws IOException
    {
        final int len = str.length();
        if(len == 0)
        {
            if(lb.offset == lb.buffer.length)
            {
                // buffer full
                // flush
                lb.offset = session.flush(lb.buffer, lb.start, lb.offset-lb.start);
            }
            
            // write zero
            lb.buffer[lb.offset++] = 0;
            // update size
            session.size++;
            return lb;
        }
        
        if(len < ONE_BYTE_EXCLUSIVE)
        {
            // the varint will be max 1-byte. (even if all chars are non-ascii)
            return writeUTF8OneByteDelimited(str, 0, len, session, lb);
        }
        
        if(len < TWO_BYTE_EXCLUSIVE)
        {
            // the varint will be max 2-bytes and could be 1-byte. (even if all non-ascii)
            return writeUTF8VarDelimited(str, 0, len, TWO_BYTE_LOWER_LIMIT, 2, 
                    session, lb);
        }
        
        if(len < THREE_BYTE_EXCLUSIVE)
        {
            // the varint will be max 3-bytes and could be 2-bytes. (even if all non-ascii)
            return writeUTF8VarDelimited(str, 0, len, THREE_BYTE_LOWER_LIMIT, 3, 
                    session, lb);
        }
        
        if(len < FOUR_BYTE_EXCLUSIVE)
        {
            // the varint will be max 4-bytes and could be 3-bytes. (even if all non-ascii)
            return writeUTF8VarDelimited(str, 0, len, FOUR_BYTE_LOWER_LIMIT, 4,
                    session, lb);
        }
        
        // the varint will be max 5-bytes and could be 4-bytes. (even if all non-ascii)
        return writeUTF8VarDelimited(str, 0, len, FIVE_BYTE_LOWER_LIMIT, 5, session, lb);
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy