All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.rsocket.util.CharByteBufUtil Maven / Gradle / Ivy

The newest version!
package io.rsocket.util;

import static io.netty.util.internal.StringUtil.isSurrogate;

import io.netty.buffer.ByteBuf;
import io.netty.util.CharsetUtil;
import io.netty.util.internal.MathUtil;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import java.util.Arrays;

public class CharByteBufUtil {

  private static final byte WRITE_UTF_UNKNOWN = (byte) '?';

  private CharByteBufUtil() {}

  /**
   * Returns the exact bytes length of UTF8 character sequence.
   *
   * 

This method is producing the exact length according to {@link #writeUtf8(ByteBuf, char[])}. */ public static int utf8Bytes(final char[] seq) { return utf8ByteCount(seq, 0, seq.length); } /** * This method is producing the exact length according to {@link #writeUtf8(ByteBuf, char[], int, * int)}. */ public static int utf8Bytes(final char[] seq, int start, int end) { return utf8ByteCount(checkCharSequenceBounds(seq, start, end), start, end); } private static int utf8ByteCount(final char[] seq, int start, int end) { int i = start; // ASCII fast path while (i < end && seq[i] < 0x80) { ++i; } // !ASCII is packed in a separate method to let the ASCII case be smaller return i < end ? (i - start) + utf8BytesNonAscii(seq, i, end) : i - start; } private static int utf8BytesNonAscii(final char[] seq, final int start, final int end) { int encodedLength = 0; for (int i = start; i < end; i++) { final char c = seq[i]; // making it 100% branchless isn't rewarding due to the many bit operations necessary! if (c < 0x800) { // branchless version of: (c <= 127 ? 0:1) + 1 encodedLength += ((0x7f - c) >>> 31) + 1; } else if (isSurrogate(c)) { if (!Character.isHighSurrogate(c)) { encodedLength++; // WRITE_UTF_UNKNOWN continue; } final char c2; try { // Surrogate Pair consumes 2 characters. Optimistically try to get the next character to // avoid // duplicate bounds checking with charAt. c2 = seq[++i]; } catch (IndexOutOfBoundsException ignored) { encodedLength++; // WRITE_UTF_UNKNOWN break; } if (!Character.isLowSurrogate(c2)) { // WRITE_UTF_UNKNOWN + (Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2) encodedLength += 2; continue; } // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630. encodedLength += 4; } else { encodedLength += 3; } } return encodedLength; } private static char[] checkCharSequenceBounds(char[] seq, int start, int end) { if (MathUtil.isOutOfBounds(start, end - start, seq.length)) { throw new IndexOutOfBoundsException( "expected: 0 <= start(" + start + ") <= end (" + end + ") <= seq.length(" + seq.length + ')'); } return seq; } /** * Encode a {@code char[]} in UTF-8 and write it * into {@link ByteBuf}. * *

This method returns the actual number of bytes written. */ public static int writeUtf8(ByteBuf buf, char[] seq) { return writeUtf8(buf, seq, 0, seq.length); } /** * Equivalent to {@link #writeUtf8(ByteBuf, char[]) writeUtf8(buf, seq.subSequence(start, end), * reserveBytes)} but avoids subsequence object allocation if possible. * * @return actual number of bytes written */ public static int writeUtf8(ByteBuf buf, char[] seq, int start, int end) { return writeUtf8(buf, buf.writerIndex(), checkCharSequenceBounds(seq, start, end), start, end); } // Fast-Path implementation static int writeUtf8(ByteBuf buffer, int writerIndex, char[] seq, int start, int end) { int oldWriterIndex = writerIndex; // We can use the _set methods as these not need to do any index checks and reference checks. // This is possible as we called ensureWritable(...) before. for (int i = start; i < end; i++) { char c = seq[i]; if (c < 0x80) { buffer.setByte(writerIndex++, (byte) c); } else if (c < 0x800) { buffer.setByte(writerIndex++, (byte) (0xc0 | (c >> 6))); buffer.setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); } else if (isSurrogate(c)) { if (!Character.isHighSurrogate(c)) { buffer.setByte(writerIndex++, WRITE_UTF_UNKNOWN); continue; } final char c2; if (seq.length > ++i) { // Surrogate Pair consumes 2 characters. Optimistically try to get the next character to // avoid // duplicate bounds checking with charAt. If an IndexOutOfBoundsException is thrown we // will // re-throw a more informative exception describing the problem. c2 = seq[i]; } else { buffer.setByte(writerIndex++, WRITE_UTF_UNKNOWN); break; } // Extra method to allow inlining the rest of writeUtf8 which is the most likely code path. writerIndex = writeUtf8Surrogate(buffer, writerIndex, c, c2); } else { buffer.setByte(writerIndex++, (byte) (0xe0 | (c >> 12))); buffer.setByte(writerIndex++, (byte) (0x80 | ((c >> 6) & 0x3f))); buffer.setByte(writerIndex++, (byte) (0x80 | (c & 0x3f))); } } buffer.writerIndex(writerIndex); return writerIndex - oldWriterIndex; } private static int writeUtf8Surrogate(ByteBuf buffer, int writerIndex, char c, char c2) { if (!Character.isLowSurrogate(c2)) { buffer.setByte(writerIndex++, WRITE_UTF_UNKNOWN); buffer.setByte(writerIndex++, Character.isHighSurrogate(c2) ? WRITE_UTF_UNKNOWN : c2); return writerIndex; } int codePoint = Character.toCodePoint(c, c2); // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G2630. buffer.setByte(writerIndex++, (byte) (0xf0 | (codePoint >> 18))); buffer.setByte(writerIndex++, (byte) (0x80 | ((codePoint >> 12) & 0x3f))); buffer.setByte(writerIndex++, (byte) (0x80 | ((codePoint >> 6) & 0x3f))); buffer.setByte(writerIndex++, (byte) (0x80 | (codePoint & 0x3f))); return writerIndex; } public static char[] readUtf8(ByteBuf byteBuf, int length) { CharsetDecoder charsetDecoder = CharsetUtil.UTF_8.newDecoder(); int en = (int) (length * (double) charsetDecoder.maxCharsPerByte()); char[] ca = new char[en]; CharBuffer charBuffer = CharBuffer.wrap(ca); ByteBuffer byteBuffer = byteBuf.nioBufferCount() == 1 ? byteBuf.internalNioBuffer(byteBuf.readerIndex(), length) : byteBuf.nioBuffer(byteBuf.readerIndex(), length); byteBuffer.mark(); try { CoderResult cr = charsetDecoder.decode(byteBuffer, charBuffer, true); if (!cr.isUnderflow()) cr.throwException(); cr = charsetDecoder.flush(charBuffer); if (!cr.isUnderflow()) cr.throwException(); byteBuffer.reset(); byteBuf.skipBytes(length); return safeTrim(charBuffer.array(), charBuffer.position()); } catch (CharacterCodingException x) { // Substitution is always enabled, // so this shouldn't happen throw new IllegalStateException("unable to decode char array from the given buffer", x); } } private static char[] safeTrim(char[] ca, int len) { if (len == ca.length) return ca; else return Arrays.copyOf(ca, len); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy