org.opensearch.common.io.UTF8StreamWriter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.common.io;
import java.io.CharConversionException;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Writer;
/**
* UTF8 Stream Writer.
*
* @opensearch.internal
*/
public final class UTF8StreamWriter extends Writer {
/**
* Holds the current output stream or null
if closed.
*/
private OutputStream _outputStream;
/**
* Holds the bytes' buffer.
*/
private final byte[] _bytes;
/**
* Holds the bytes buffer index.
*/
private int _index;
/**
* Creates a UTF-8 writer having a byte buffer of moderate capacity (2048).
*/
public UTF8StreamWriter() {
_bytes = new byte[2048];
}
/**
* Creates a UTF-8 writer having a byte buffer of specified capacity.
*
* @param capacity the capacity of the byte buffer.
*/
public UTF8StreamWriter(int capacity) {
_bytes = new byte[capacity];
}
/**
* Sets the output stream to use for writing until this writer is closed.
* For example:[code]
* Writer writer = new UTF8StreamWriter().setOutputStream(out);
* [/code] is equivalent but writes faster than [code]
* Writer writer = new java.io.OutputStreamWriter(out, "UTF-8");
* [/code]
*
* @param out the output stream.
* @return this UTF-8 writer.
* @throws IllegalStateException if this writer is being reused and
* it has not been {@link #close closed} or {@link #reset reset}.
*/
public UTF8StreamWriter setOutput(OutputStream out) {
if (_outputStream != null) throw new IllegalStateException("Writer not closed or reset");
_outputStream = out;
return this;
}
/**
* Writes a single character. This method supports 16-bits
* character surrogates.
*
* @param c char
the character to be written (possibly
* a surrogate).
* @throws IOException if an I/O error occurs.
*/
public void write(char c) throws IOException {
if ((c < 0xd800) || (c > 0xdfff)) {
write((int) c);
} else if (c < 0xdc00) { // High surrogate.
_highSurrogate = c;
} else { // Low surrogate.
int code = ((_highSurrogate - 0xd800) << 10) + (c - 0xdc00) + 0x10000;
write(code);
}
}
private char _highSurrogate;
/**
* Writes a character given its 31-bits Unicode.
*
* @param code the 31 bits Unicode of the character to be written.
* @throws IOException if an I/O error occurs.
*/
@Override
public void write(int code) throws IOException {
if ((code & 0xffffff80) == 0) {
_bytes[_index] = (byte) code;
if (++_index >= _bytes.length) {
flushBuffer();
}
} else { // Writes more than one byte.
write2(code);
}
}
private void write2(int c) throws IOException {
if ((c & 0xfffff800) == 0) { // 2 bytes.
_bytes[_index] = (byte) (0xc0 | (c >> 6));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | (c & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
} else if ((c & 0xffff0000) == 0) { // 3 bytes.
_bytes[_index] = (byte) (0xe0 | (c >> 12));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | (c & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
} else if ((c & 0xff200000) == 0) { // 4 bytes.
_bytes[_index] = (byte) (0xf0 | (c >> 18));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | (c & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
} else if ((c & 0xf4000000) == 0) { // 5 bytes.
_bytes[_index] = (byte) (0xf8 | (c >> 24));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | (c & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
} else if ((c & 0x80000000) == 0) { // 6 bytes.
_bytes[_index] = (byte) (0xfc | (c >> 30));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 24) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 18) & 0x3f));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 12) & 0x3F));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | ((c >> 6) & 0x3F));
if (++_index >= _bytes.length) {
flushBuffer();
}
_bytes[_index] = (byte) (0x80 | (c & 0x3F));
if (++_index >= _bytes.length) {
flushBuffer();
}
} else {
throw new CharConversionException("Illegal character U+" + Integer.toHexString(c));
}
}
/**
* Writes a portion of an array of characters.
*
* @param cbuf the array of characters.
* @param off the offset from which to start writing characters.
* @param len the number of characters to write.
* @throws IOException if an I/O error occurs.
*/
@Override
public void write(char cbuf[], int off, int len) throws IOException {
final int off_plus_len = off + len;
for (int i = off; i < off_plus_len;) {
char c = cbuf[i++];
if (c < 0x80) {
_bytes[_index] = (byte) c;
if (++_index >= _bytes.length) {
flushBuffer();
}
} else {
write(c);
}
}
}
/**
* Writes a portion of a string.
*
* @param str a String.
* @param off the offset from which to start writing characters.
* @param len the number of characters to write.
* @throws IOException if an I/O error occurs
*/
@Override
public void write(String str, int off, int len) throws IOException {
final int off_plus_len = off + len;
for (int i = off; i < off_plus_len;) {
char c = str.charAt(i++);
if (c < 0x80) {
_bytes[_index] = (byte) c;
if (++_index >= _bytes.length) {
flushBuffer();
}
} else {
write(c);
}
}
}
/**
* Writes the specified character sequence.
*
* @param csq the character sequence.
* @throws IOException if an I/O error occurs
*/
public void write(CharSequence csq) throws IOException {
final int length = csq.length();
for (int i = 0; i < length;) {
char c = csq.charAt(i++);
if (c < 0x80) {
_bytes[_index] = (byte) c;
if (++_index >= _bytes.length) {
flushBuffer();
}
} else {
write(c);
}
}
}
/**
* Flushes the stream. If the stream has saved any characters from the
* various write() methods in a buffer, write them immediately to their
* intended destination. Then, if that destination is another character or
* byte stream, flush it. Thus one flush() invocation will flush all the
* buffers in a chain of Writers and OutputStreams.
*
* @throws IOException if an I/O error occurs.
*/
@Override
public void flush() throws IOException {
flushBuffer();
_outputStream.flush();
}
/**
* Closes and {@link #reset resets} this writer for reuse.
*
* @throws IOException if an I/O error occurs
*/
@Override
public void close() throws IOException {
if (_outputStream != null) {
flushBuffer();
_outputStream.close();
reset();
}
}
/**
* Flushes the internal bytes buffer.
*
* @throws IOException if an I/O error occurs
*/
private void flushBuffer() throws IOException {
if (_outputStream == null) throw new IOException("Stream closed");
_outputStream.write(_bytes, 0, _index);
_index = 0;
}
// Implements Reusable.
public void reset() {
_highSurrogate = 0;
_index = 0;
_outputStream = null;
}
}