org.eclipse.jetty.io.WriteThroughWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jetty-io Show documentation
There is a newer version: 12.1.0.alpha0
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//

package org.eclipse.jetty.io;

import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Objects;

import org.eclipse.jetty.util.ByteArrayOutputStream2;

/**
 * An alternate to {@link java.io.OutputStreamWriter} that supports
 * several optimized implementation for well known {@link Charset}s,
 * specifically {@link StandardCharsets#UTF_8} and {@link StandardCharsets#ISO_8859_1}.
 * The implementations of this class will never buffer characters or bytes beyond a call to the
 * {@link #write(char[], int, int)} method, thus written characters will always be passed
 * as bytes to the passed {@link OutputStream}.
 */
public abstract class WriteThroughWriter extends Writer
{
    static final int DEFAULT_MAX_WRITE_SIZE = 1024;
    private final int _maxWriteSize;
    final OutputStream _out;
    final ByteArrayOutputStream2 _bytes;

    protected WriteThroughWriter(OutputStream out)
    {
        this(out, 0);
    }

    /**
     * Construct an {@link java.io.OutputStreamWriter}
     * @param out The {@link OutputStream} to write the converted bytes to.
     * @param maxWriteSize The maximum size in characters of a single conversion
     */
    protected WriteThroughWriter(OutputStream out, int maxWriteSize)
    {
        _maxWriteSize = maxWriteSize <= 0 ? DEFAULT_MAX_WRITE_SIZE : maxWriteSize;
        _out = out;
        _bytes = new ByteArrayOutputStream2(_maxWriteSize);
    }

    /**
     * Obtain a new {@link Writer} that converts characters written to bytes
     * written to an {@link OutputStream}.
     * @param outputStream The {@link OutputStream} to write to/
     * @param charset The {@link Charset} name.
     * @return A Writer that will
     * @throws IOException If there is a problem creating the {@link Writer}.
     */
    public static WriteThroughWriter newWriter(OutputStream outputStream, String charset)
        throws IOException
    {
        if (StandardCharsets.ISO_8859_1.name().equalsIgnoreCase(charset))
            return new Iso88591Writer(outputStream);
        if (StandardCharsets.UTF_8.name().equalsIgnoreCase(charset))
            return new Utf8Writer(outputStream);
        return new EncodingWriter(outputStream, charset);
    }

    /**
     * Obtain a new {@link Writer} that converts characters written to bytes
     * written to an {@link OutputStream}.
     * @param outputStream The {@link OutputStream} to write to/
     * @param charset The {@link Charset}.
     * @return A Writer that will
     * @throws IOException If there is a problem creating the {@link Writer}.
     */
    public static WriteThroughWriter newWriter(OutputStream outputStream, Charset charset)
        throws IOException
    {
        if (StandardCharsets.ISO_8859_1 == charset)
            return new Iso88591Writer(outputStream);
        if (StandardCharsets.UTF_8.equals(charset))
            return new Utf8Writer(outputStream);
        return new EncodingWriter(outputStream, charset);
    }

    public int getMaxWriteSize()
    {
        return _maxWriteSize;
    }

    @Override
    public void close() throws IOException
    {
        _out.close();
    }
    
    @Override
    public void flush() throws IOException
    {
        _out.flush();
    }

    @Override
    public abstract WriteThroughWriter append(CharSequence sequence) throws IOException;

    @Override
    public void write(String string, int offset, int length) throws IOException
    {
        while (length > _maxWriteSize)
        {
            append(subSequence(string, offset, _maxWriteSize));
            offset += _maxWriteSize;
            length -= _maxWriteSize;
        }

        append(subSequence(string, offset, length));
    }

    @Override
    public void write(char[] chars, int offset, int length) throws IOException
    {
        while (length > _maxWriteSize)
        {
            append(subSequence(chars, offset, _maxWriteSize));
            offset += _maxWriteSize;
            length -= _maxWriteSize;
        }

        append(subSequence(chars, offset, length));
    }

    /**
     * An implementation of {@link WriteThroughWriter} for
     * optimal ISO-8859-1 conversion.
     * The ISO-8859-1 encoding is done by this class and no additional
     * buffers or Writers are used.
     */
    private static class Iso88591Writer extends WriteThroughWriter
    {
        private Iso88591Writer(OutputStream out)
        {
            super(out);
        }

        @Override
        public WriteThroughWriter append(CharSequence charSequence) throws IOException
        {
            assert charSequence.length() <= getMaxWriteSize();

            if (charSequence.length() == 1)
            {
                int c = charSequence.charAt(0);
                _out.write(c < 256 ? c : '?');
                return this;
            }

            _bytes.reset();
            int bytes = 0;
            byte[] buffer = _bytes.getBuf();
            int length = charSequence.length();
            for (int offset = 0; offset < length; offset++)
            {
                int c = charSequence.charAt(offset);
                buffer[bytes++] = (byte)(c < 256 ? c : '?');
            }
            if (bytes >= 0)
                _bytes.setCount(bytes);
            _bytes.writeTo(_out);
            return this;
        }
    }

    /**
     * An implementation of {@link WriteThroughWriter} for
     * an optimal UTF-8 conversion.
     * The UTF-8 encoding is done by this class and no additional
     * buffers or Writers are used.
     * The UTF-8 code was inspired by ...
     */
    private static class Utf8Writer extends WriteThroughWriter
    {
        int _surrogate = 0;

        private Utf8Writer(OutputStream out)
        {
            super(out);
        }

        @Override
        public WriteThroughWriter append(CharSequence charSequence) throws IOException
        {
            assert charSequence.length() <= getMaxWriteSize();
            int length = charSequence.length();
            int offset = 0;
            while (length > 0)
            {
                _bytes.reset();
                int chars = Math.min(length, getMaxWriteSize());

                byte[] buffer = _bytes.getBuf();
                int bytes = _bytes.getCount();

                if (bytes + chars > buffer.length)
                    chars = buffer.length - bytes;

                for (int i = 0; i < chars; i++)
                {
                    int code = charSequence.charAt(offset + i);

                    // Do we already have a surrogate?
                    if (_surrogate == 0)
                    {
                        // No - is this char code a surrogate?
                        if (Character.isHighSurrogate((char)code))
                        {
                            _surrogate = code; // UCS-?
                            continue;
                        }
                    }
                    // else handle a low surrogate
                    else if (Character.isLowSurrogate((char)code))
                    {
                        code = Character.toCodePoint((char)_surrogate, (char)code); // UCS-4
                    }
                    // else UCS-2
                    else
                    {
                        code = _surrogate; // UCS-2
                        _surrogate = 0; // USED
                        i--;
                    }

                    if ((code & 0xffffff80) == 0)
                    {
                        // 1b
                        if (bytes >= buffer.length)
                        {
                            chars = i;
                            break;
                        }
                        buffer[bytes++] = (byte)(code);
                    }
                    else
                    {
                        if ((code & 0xfffff800) == 0)
                        {
                            // 2b
                            if (bytes + 2 > buffer.length)
                            {
                                chars = i;
                                break;
                            }
                            buffer[bytes++] = (byte)(0xc0 | (code >> 6));
                            buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
                        }
                        else if ((code & 0xffff0000) == 0)
                        {
                            // 3b
                            if (bytes + 3 > buffer.length)
                            {
                                chars = i;
                                break;
                            }
                            buffer[bytes++] = (byte)(0xe0 | (code >> 12));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
                        }
                        else if ((code & 0xff200000) == 0)
                        {
                            // 4b
                            if (bytes + 4 > buffer.length)
                            {
                                chars = i;
                                break;
                            }
                            buffer[bytes++] = (byte)(0xf0 | (code >> 18));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
                        }
                        else if ((code & 0xf4000000) == 0)
                        {
                            // 5b
                            if (bytes + 5 > buffer.length)
                            {
                                chars = i;
                                break;
                            }
                            buffer[bytes++] = (byte)(0xf8 | (code >> 24));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 18) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
                        }
                        else if ((code & 0x80000000) == 0)
                        {
                            // 6b
                            if (bytes + 6 > buffer.length)
                            {
                                chars = i;
                                break;
                            }
                            buffer[bytes++] = (byte)(0xfc | (code >> 30));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 24) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 18) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
                            buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
                        }
                        else
                        {
                            buffer[bytes++] = (byte)('?');
                        }

                        _surrogate = 0; // USED

                        if (bytes == buffer.length)
                        {
                            chars = i + 1;
                            break;
                        }
                    }
                }
                _bytes.setCount(bytes);

                _bytes.writeTo(_out);
                length -= chars;
                offset += chars;
            }
            return this;
        }
    }

    /**
     * An implementation of {@link WriteThroughWriter} that internally
     * uses {@link java.io.OutputStreamWriter}.
     */
    private static class EncodingWriter extends WriteThroughWriter
    {
        final Writer _converter;

        public EncodingWriter(OutputStream out, String encoding) throws IOException
        {
            super(out);
            _converter = new OutputStreamWriter(_bytes, encoding);
        }

        public EncodingWriter(OutputStream out, Charset charset) throws IOException
        {
            super(out);
            _converter = new OutputStreamWriter(_bytes, charset);
        }

        @Override
        public WriteThroughWriter append(CharSequence charSequence) throws IOException
        {
            assert charSequence.length() <= getMaxWriteSize();

            _bytes.reset();
            _converter.append(charSequence);
            _converter.flush();
            _bytes.writeTo(_out);
            return this;
        }
    }

    /**
     * Get a zero copy subsequence of a {@link String}.
     * Use of this is method can result in unforeseen GC consequences and can bypass
     * JVM optimizations available in {@link String#subSequence(int, int)}.  It should only
     * be used in cases where there is a known benefit: large sub sequence of a larger string with no retained
     * references to the sub sequence beyond the life time of the string.
     * @param string The {@link String} to take a subsequence of.
     * @param offset The offset in characters into the string to start the subsequence
     * @param length The length in characters of the substring
     * @return A new {@link CharSequence} containing the subsequence, backed by the passed {@link String}
     * or the original {@link String} if it is the same.
     */
    static CharSequence subSequence(String string, int offset, int length)
    {
        Objects.requireNonNull(string);

        if (offset == 0 && string.length() == length)
            return string;
        if (length == 0)
            return "";

        int end = offset + length;
        if (offset < 0 || offset > end || end > string.length())
            throw new IndexOutOfBoundsException("offset and/or length out of range");

        return new CharSequence()
        {
            @Override
            public int length()
            {
                return length;
            }

            @Override
            public char charAt(int index)
            {
                return string.charAt(offset + index);
            }

            @Override
            public CharSequence subSequence(int start, int end)
            {
                return WriteThroughWriter.subSequence(string, offset + start, end - start);
            }

            @Override
            public String toString()
            {
                return string.substring(offset, offset + length);
            }
        };
    }

    /**
     * Get a zero copy subsequence of a {@code char} array.
     * @param chars The characters to take a subsequence of.  These character are not copied and the array should not be
     * modified for the life of the returned CharSequence.
     * @param offset The offset in characters into the string to start the subsequence
     * @param length The length in characters of the substring
     * @return A new {@link CharSequence} containing the subsequence.
     */
    static CharSequence subSequence(char[] chars, int offset, int length)
    {
        // Needed to make bounds check of wrap the same as for string.substring
        if (length == 0)
            return "";
        return CharBuffer.wrap(chars, offset, length);
    }
}