org.eclipse.jetty.io.WriteThroughWriter Maven / Gradle / Ivy
//
// ========================================================================
// Copyright (c) 1995 Mort Bay Consulting Pty Ltd and others.
//
// This program and the accompanying materials are made available under the
// terms of the Eclipse Public License v. 2.0 which is available at
// https://www.eclipse.org/legal/epl-2.0, or the Apache License, Version 2.0
// which is available at https://www.apache.org/licenses/LICENSE-2.0.
//
// SPDX-License-Identifier: EPL-2.0 OR Apache-2.0
// ========================================================================
//
package org.eclipse.jetty.io;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Objects;
import org.eclipse.jetty.util.ByteArrayOutputStream2;
/**
* An alternate to {@link java.io.OutputStreamWriter} that supports
* several optimized implementation for well known {@link Charset}s,
* specifically {@link StandardCharsets#UTF_8} and {@link StandardCharsets#ISO_8859_1}.
* The implementations of this class will never buffer characters or bytes beyond a call to the
* {@link #write(char[], int, int)} method, thus written characters will always be passed
* as bytes to the passed {@link OutputStream}
.
*/
public abstract class WriteThroughWriter extends Writer
{
static final int DEFAULT_MAX_WRITE_SIZE = 1024;
private final int _maxWriteSize;
final OutputStream _out;
final ByteArrayOutputStream2 _bytes;
protected WriteThroughWriter(OutputStream out)
{
this(out, 0);
}
/**
* Construct an {@link java.io.OutputStreamWriter}
* @param out The {@link OutputStream} to write the converted bytes to.
* @param maxWriteSize The maximum size in characters of a single conversion
*/
protected WriteThroughWriter(OutputStream out, int maxWriteSize)
{
_maxWriteSize = maxWriteSize <= 0 ? DEFAULT_MAX_WRITE_SIZE : maxWriteSize;
_out = out;
_bytes = new ByteArrayOutputStream2(_maxWriteSize);
}
/**
* Obtain a new {@link Writer} that converts characters written to bytes
* written to an {@link OutputStream}.
* @param outputStream The {@link OutputStream} to write to/
* @param charset The {@link Charset} name.
* @return A Writer that will
* @throws IOException If there is a problem creating the {@link Writer}.
*/
public static WriteThroughWriter newWriter(OutputStream outputStream, String charset)
throws IOException
{
if (StandardCharsets.ISO_8859_1.name().equalsIgnoreCase(charset))
return new Iso88591Writer(outputStream);
if (StandardCharsets.UTF_8.name().equalsIgnoreCase(charset))
return new Utf8Writer(outputStream);
return new EncodingWriter(outputStream, charset);
}
/**
* Obtain a new {@link Writer} that converts characters written to bytes
* written to an {@link OutputStream}.
* @param outputStream The {@link OutputStream} to write to/
* @param charset The {@link Charset}.
* @return A Writer that will
* @throws IOException If there is a problem creating the {@link Writer}.
*/
public static WriteThroughWriter newWriter(OutputStream outputStream, Charset charset)
throws IOException
{
if (StandardCharsets.ISO_8859_1 == charset)
return new Iso88591Writer(outputStream);
if (StandardCharsets.UTF_8.equals(charset))
return new Utf8Writer(outputStream);
return new EncodingWriter(outputStream, charset);
}
public int getMaxWriteSize()
{
return _maxWriteSize;
}
@Override
public void close() throws IOException
{
_out.close();
}
@Override
public void flush() throws IOException
{
_out.flush();
}
@Override
public abstract WriteThroughWriter append(CharSequence sequence) throws IOException;
@Override
public void write(String string, int offset, int length) throws IOException
{
while (length > _maxWriteSize)
{
append(subSequence(string, offset, _maxWriteSize));
offset += _maxWriteSize;
length -= _maxWriteSize;
}
append(subSequence(string, offset, length));
}
@Override
public void write(char[] chars, int offset, int length) throws IOException
{
while (length > _maxWriteSize)
{
append(subSequence(chars, offset, _maxWriteSize));
offset += _maxWriteSize;
length -= _maxWriteSize;
}
append(subSequence(chars, offset, length));
}
/**
* An implementation of {@link WriteThroughWriter} for
* optimal ISO-8859-1 conversion.
* The ISO-8859-1 encoding is done by this class and no additional
* buffers or Writers are used.
*/
private static class Iso88591Writer extends WriteThroughWriter
{
private Iso88591Writer(OutputStream out)
{
super(out);
}
@Override
public WriteThroughWriter append(CharSequence charSequence) throws IOException
{
assert charSequence.length() <= getMaxWriteSize();
if (charSequence.length() == 1)
{
int c = charSequence.charAt(0);
_out.write(c < 256 ? c : '?');
return this;
}
_bytes.reset();
int bytes = 0;
byte[] buffer = _bytes.getBuf();
int length = charSequence.length();
for (int offset = 0; offset < length; offset++)
{
int c = charSequence.charAt(offset);
buffer[bytes++] = (byte)(c < 256 ? c : '?');
}
if (bytes >= 0)
_bytes.setCount(bytes);
_bytes.writeTo(_out);
return this;
}
}
/**
* An implementation of {@link WriteThroughWriter} for
* an optimal UTF-8 conversion.
* The UTF-8 encoding is done by this class and no additional
* buffers or Writers are used.
* The UTF-8 code was inspired by ...
*/
private static class Utf8Writer extends WriteThroughWriter
{
int _surrogate = 0;
private Utf8Writer(OutputStream out)
{
super(out);
}
@Override
public WriteThroughWriter append(CharSequence charSequence) throws IOException
{
assert charSequence.length() <= getMaxWriteSize();
int length = charSequence.length();
int offset = 0;
while (length > 0)
{
_bytes.reset();
int chars = Math.min(length, getMaxWriteSize());
byte[] buffer = _bytes.getBuf();
int bytes = _bytes.getCount();
if (bytes + chars > buffer.length)
chars = buffer.length - bytes;
for (int i = 0; i < chars; i++)
{
int code = charSequence.charAt(offset + i);
// Do we already have a surrogate?
if (_surrogate == 0)
{
// No - is this char code a surrogate?
if (Character.isHighSurrogate((char)code))
{
_surrogate = code; // UCS-?
continue;
}
}
// else handle a low surrogate
else if (Character.isLowSurrogate((char)code))
{
code = Character.toCodePoint((char)_surrogate, (char)code); // UCS-4
}
// else UCS-2
else
{
code = _surrogate; // UCS-2
_surrogate = 0; // USED
i--;
}
if ((code & 0xffffff80) == 0)
{
// 1b
if (bytes >= buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(code);
}
else
{
if ((code & 0xfffff800) == 0)
{
// 2b
if (bytes + 2 > buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(0xc0 | (code >> 6));
buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
}
else if ((code & 0xffff0000) == 0)
{
// 3b
if (bytes + 3 > buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(0xe0 | (code >> 12));
buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
}
else if ((code & 0xff200000) == 0)
{
// 4b
if (bytes + 4 > buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(0xf0 | (code >> 18));
buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
}
else if ((code & 0xf4000000) == 0)
{
// 5b
if (bytes + 5 > buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(0xf8 | (code >> 24));
buffer[bytes++] = (byte)(0x80 | ((code >> 18) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
}
else if ((code & 0x80000000) == 0)
{
// 6b
if (bytes + 6 > buffer.length)
{
chars = i;
break;
}
buffer[bytes++] = (byte)(0xfc | (code >> 30));
buffer[bytes++] = (byte)(0x80 | ((code >> 24) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 18) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 12) & 0x3f));
buffer[bytes++] = (byte)(0x80 | ((code >> 6) & 0x3f));
buffer[bytes++] = (byte)(0x80 | (code & 0x3f));
}
else
{
buffer[bytes++] = (byte)('?');
}
_surrogate = 0; // USED
if (bytes == buffer.length)
{
chars = i + 1;
break;
}
}
}
_bytes.setCount(bytes);
_bytes.writeTo(_out);
length -= chars;
offset += chars;
}
return this;
}
}
/**
* An implementation of {@link WriteThroughWriter} that internally
* uses {@link java.io.OutputStreamWriter}.
*/
private static class EncodingWriter extends WriteThroughWriter
{
final Writer _converter;
public EncodingWriter(OutputStream out, String encoding) throws IOException
{
super(out);
_converter = new OutputStreamWriter(_bytes, encoding);
}
public EncodingWriter(OutputStream out, Charset charset) throws IOException
{
super(out);
_converter = new OutputStreamWriter(_bytes, charset);
}
@Override
public WriteThroughWriter append(CharSequence charSequence) throws IOException
{
assert charSequence.length() <= getMaxWriteSize();
_bytes.reset();
_converter.append(charSequence);
_converter.flush();
_bytes.writeTo(_out);
return this;
}
}
/**
* Get a zero copy subsequence of a {@link String}.
* Use of this is method can result in unforeseen GC consequences and can bypass
* JVM optimizations available in {@link String#subSequence(int, int)}. It should only
* be used in cases where there is a known benefit: large sub sequence of a larger string with no retained
* references to the sub sequence beyond the life time of the string.
* @param string The {@link String} to take a subsequence of.
* @param offset The offset in characters into the string to start the subsequence
* @param length The length in characters of the substring
* @return A new {@link CharSequence} containing the subsequence, backed by the passed {@link String}
* or the original {@link String} if it is the same.
*/
static CharSequence subSequence(String string, int offset, int length)
{
Objects.requireNonNull(string);
if (offset == 0 && string.length() == length)
return string;
if (length == 0)
return "";
int end = offset + length;
if (offset < 0 || offset > end || end > string.length())
throw new IndexOutOfBoundsException("offset and/or length out of range");
return new CharSequence()
{
@Override
public int length()
{
return length;
}
@Override
public char charAt(int index)
{
return string.charAt(offset + index);
}
@Override
public CharSequence subSequence(int start, int end)
{
return WriteThroughWriter.subSequence(string, offset + start, end - start);
}
@Override
public String toString()
{
return string.substring(offset, offset + length);
}
};
}
/**
* Get a zero copy subsequence of a {@code char} array.
* @param chars The characters to take a subsequence of. These character are not copied and the array should not be
* modified for the life of the returned CharSequence.
* @param offset The offset in characters into the string to start the subsequence
* @param length The length in characters of the substring
* @return A new {@link CharSequence} containing the subsequence.
*/
static CharSequence subSequence(char[] chars, int offset, int length)
{
// Needed to make bounds check of wrap the same as for string.substring
if (length == 0)
return "";
return CharBuffer.wrap(chars, offset, length);
}
}