All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.openhft.chronicle.bytes.AppendableUtil Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2016-2022 chronicle.software
 *
 *     https://chronicle.software
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.openhft.chronicle.bytes;

import net.openhft.chronicle.bytes.internal.BytesInternal;
import net.openhft.chronicle.bytes.internal.NativeBytesStore;
import net.openhft.chronicle.core.Maths;
import net.openhft.chronicle.core.annotation.Java9;
import net.openhft.chronicle.core.annotation.NonNegative;
import net.openhft.chronicle.core.io.ClosedIllegalStateException;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.IOException;
import java.io.UTFDataFormatException;
import java.nio.BufferOverflowException;
import java.nio.BufferUnderflowException;

import static net.openhft.chronicle.core.util.ObjectUtils.requireNonNull;

/**
 * Utility class for working with Appendable objects such as StringBuilder and Bytes.
 */
@SuppressWarnings("rawtypes")
public enum AppendableUtil {

    ; // Enum with no instances signifies a utility class.

    private static final String MALFORMED_INPUT_AROUND_BYTE = "malformed input around byte ";

    /**
     * Sets a character at a specified index in the given Appendable.
     *
     * @param sb    the Appendable to modify
     * @param index the index at which to set the character
     * @param ch    the character to set
     * @throws IllegalArgumentException If the Appendable is not of type StringBuilder or Bytes
     * @throws BufferOverflowException  If the index is larger than the buffer's capacity
     */
    public static void setCharAt(@NotNull Appendable sb, @NonNegative int index, char ch)
            throws IllegalArgumentException, BufferOverflowException {
        if (sb instanceof StringBuilder)
            ((StringBuilder) sb).setCharAt(index, ch);
        else if (sb instanceof Bytes)
            ((Bytes) sb).writeByte(index, ch);
        else
            throw new IllegalArgumentException(String.valueOf(sb.getClass()));
    }

    /**
     * Parses a UTF-8 BytesStore into a StringBuilder.
     *
     * @param bs     the BytesStore to parse
     * @param sb     the StringBuilder to append to
     * @param utf    whether to parse as UTF-8
     * @param length the length of characters to parse
     * @throws UTFDataFormatRuntimeException If invalid UTF-8 sequence is encountered
     * @throws BufferUnderflowException      If the BytesStore doesn't contain enough data
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void parseUtf8(@NotNull BytesStore bs, StringBuilder sb, boolean utf, @NonNegative int length)
            throws UTFDataFormatRuntimeException, BufferUnderflowException, ClosedIllegalStateException {
        BytesInternal.parseUtf8(bs, bs.readPosition(), sb, utf, length);
    }

    /**
     * Sets the length of the given Appendable.
     *
     * @param sb        the Appendable to modify
     * @param newLength the new length to set
     * @throws IllegalArgumentException    If the Appendable is not of type StringBuilder or Bytes
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     * @throws BufferUnderflowException    If the new length is greater than the Bytes's capacity
     */
    public static void setLength(@NotNull Appendable sb, @NonNegative int newLength)
            throws IllegalArgumentException, ClosedIllegalStateException, BufferUnderflowException {
        requireNonNull(sb);
        if (sb instanceof StringBuilder)
            ((StringBuilder) sb).setLength(newLength);
        else if (sb instanceof Bytes)
            ((Bytes) sb).readPositionRemaining(0, newLength);
        else
            throw new IllegalArgumentException(String.valueOf(sb.getClass()));
    }

    /**
     * Appends a double value to the given Appendable.
     *
     * @param sb    the Appendable to append to
     * @param value the double value to append
     * @throws IllegalArgumentException    If the Appendable is not of type StringBuilder or Bytes
     * @throws BufferOverflowException     If there is not enough space in the Bytes to append the value
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void append(@NotNull Appendable sb, double value)
            throws IllegalArgumentException, BufferOverflowException, ClosedIllegalStateException {
        if (sb instanceof StringBuilder)
            ((StringBuilder) sb).append(value);
        else if (sb instanceof Bytes)
            ((Bytes) sb).append(value);
        else
            throw new IllegalArgumentException(String.valueOf(sb.getClass()));
    }

    /**
     * Appends a long value to the given Appendable.
     *
     * @param sb    the Appendable to append to
     * @param value the long value to append
     * @throws IllegalArgumentException    If the Appendable is not of type StringBuilder or Bytes
     * @throws BufferOverflowException     If there is not enough space in the Bytes to append the value
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void append(@NotNull Appendable sb, long value)
            throws IllegalArgumentException, BufferOverflowException, ClosedIllegalStateException {
        if (sb instanceof StringBuilder)
            ((StringBuilder) sb).append(value);
        else if (sb instanceof Bytes)
            ((Bytes) sb).append(value);
        else
            throw new IllegalArgumentException(String.valueOf(sb.getClass()));
    }

    /**
     * Appends a string to an Appendable that also implements CharSequence.
     *
     * @param sb  the Appendable to append to
     * @param str the String to append
     */
    public static  void append(@NotNull C sb, String str) {
        try {
            sb.append(str);
        } catch (IOException e) {
            throw new AssertionError(e);
        }
    }

    /**
     * Reads an 8-bit character from a StreamingDataInput and appends it to a StringBuilder
     * until a stop character as defined by the given StopCharsTester is encountered.
     *
     * @param bytes      the StreamingDataInput to read from
     * @param appendable the StringBuilder to append to
     * @param tester     the StopCharsTester defining the stop character
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void read8bitAndAppend(@NotNull StreamingDataInput bytes,
                                         @NotNull StringBuilder appendable,
                                         @NotNull StopCharsTester tester)
            throws ClosedIllegalStateException {
        while (true) {
            int c = bytes.readUnsignedByte();
            if (tester.isStopChar(c, bytes.peekUnsignedByte()))
                return;
            appendable.append((char) c);
            if (bytes.readRemaining() == 0)
                return;
        }
    }

    /**
     * Reads an 8-bit character from a StreamingDataInput and appends it to an Appendable
     * until a stop character as defined by the given StopCharsTester is encountered.
     *
     * @param bytes      the StreamingDataInput to read from
     * @param appendable the Appendable to append to
     * @param tester     the StopCharsTester defining the stop character
     * @throws BufferUnderflowException    If the StreamingDataInput is exhausted
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void readUTFAndAppend(@NotNull StreamingDataInput bytes,
                                        @NotNull Appendable appendable,
                                        @NotNull StopCharsTester tester)
            throws BufferUnderflowException, ClosedIllegalStateException {
        try {
            readUtf8AndAppend(bytes, appendable, tester);
        } catch (IOException e) {
            throw new AssertionError(e);
        }
    }

    /**
     * Reads a UTF-8 encoded character from a StreamingDataInput and appends it to an Appendable
     * until a stop character as defined by the given StopCharsTester is encountered.
     *
     * @param bytes      the StreamingDataInput to read from
     * @param appendable the Appendable to append to
     * @param tester     the StopCharsTester defining the stop character
     * @throws BufferUnderflowException    If the StreamingDataInput is exhausted
     * @throws IOException                 If an I/O error occurs
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void readUtf8AndAppend(@NotNull StreamingDataInput bytes,
                                         @NotNull Appendable appendable,
                                         @NotNull StopCharsTester tester)
            throws BufferUnderflowException, IOException, ClosedIllegalStateException {
        while (true) {
            int c = bytes.readUnsignedByte();
            // If the character read is a multi-byte UTF-8 character, rewind and break the loop.
            if (c >= 128) {
                bytes.readSkip(-1);
                break;
            }

            // Special handling for array classes like byte[] which are denoted as '[B'.
            if (c == '[' && bytes.peekUnsignedByte() == ']') {
                appendable.append((char) c);
                appendable.append((char) bytes.readUnsignedByte());
                if (bytes.readRemaining() == 0)
                    return;
                continue;
            }

            // If the stop character is encountered, return.
            if (tester.isStopChar(c, bytes.peekUnsignedByte()))
                return;
            appendable.append((char) c);
            if (bytes.readRemaining() == 0)
                return;
        }

        // Handle multi-byte UTF-8 characters
        for (int c; (c = bytes.readUnsignedByte()) >= 0; ) {
            switch (c >> 4) {
                // If the character is a 1-byte UTF-8 character (0xxxxxxx), append it as is.
                case 0:
                case 1:
                case 2:
                case 3:
                case 4:
                case 5:
                case 6:
                case 7:
                    /* 0xxxxxxx */
                    if (tester.isStopChar(c, bytes.peekUnsignedByte()))
                        return;
                    appendable.append((char) c);
                    break;

                // If the character is a 2-byte UTF-8 character (110x xxxx 10xx xxxx), decode and append it.
                case 12:
                case 13: {
                    /* 110x xxxx 10xx xxxx */
                    int char2 = bytes.readUnsignedByte();
                    if ((char2 & 0xC0) != 0x80)
                        throw newUTFDataFormatException(char2);
                    int c2 = (char) (((c & 0x1F) << 6) |
                            (char2 & 0x3F));
                    if (tester.isStopChar(c2, bytes.peekUnsignedByte()))
                        return;
                    appendable.append((char) c2);
                    break;
                }

                // If the character is a 3-byte UTF-8 character (1110 xxxx 10xx xxxx 10xx xxxx), decode and append it.
                case 14: {
                    /* 1110 xxxx 10xx xxxx 10xx xxxx */
                    int char2 = bytes.readUnsignedByte();
                    int char3 = bytes.readUnsignedByte();

                    if (((char2 & 0xC0) != 0x80))
                        throw newUTFDataFormatException(char2);
                    if ((char3 & 0xC0) != 0x80)
                        throw newUTFDataFormatException(char3);
                    int c3 = (char) (((c & 0x0F) << 12) |
                            ((char2 & 0x3F) << 6) |
                            (char3 & 0x3F));
                    if (tester.isStopChar(c3, bytes.peekUnsignedByte()))
                        return;
                    appendable.append((char) c3);
                    break;
                }

                default:
                    // If the character does not match any valid UTF-8 pattern, throw an exception.
                    /* 10xx xxxx, 1111 xxxx */
                    throw newUTFDataFormatException(c);
            }
        }
    }

    private static UTFDataFormatException newUTFDataFormatException(final int c) {
        return new UTFDataFormatException(MALFORMED_INPUT_AROUND_BYTE + Integer.toHexString(c));
    }

    /**
     * Parses a sequence of 8-bit characters from the given Bytes input and appends them to a StringBuilder.
     *
     * @param bytes  the input Bytes to read from
     * @param sb     the StringBuilder to append the characters to
     * @param length the number of characters to read
     * @throws BufferUnderflowException    If there are not enough characters available in the input
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void parse8bit_SB1(@NotNull Bytes bytes, @NotNull StringBuilder sb, @NonNegative int length)
            throws BufferUnderflowException, ClosedIllegalStateException {
        if (length > bytes.readRemaining())
            throw new BufferUnderflowException();
        @Nullable NativeBytesStore nbs = (NativeBytesStore) bytes.bytesStore();
        long offset = bytes.readPosition();
        int count = BytesInternal.parse8bit_SB1(offset, nbs, sb, length);
        bytes.readSkip(count);
    }

    /**
     * Parses a sequence of 8-bit characters from the given StreamingDataInput and appends them to an Appendable.
     *
     * @param bytes      the input StreamingDataInput to read from
     * @param appendable the Appendable to append the characters to
     * @param utflen     the number of characters to read
     * @throws BufferUnderflowException    If there are not enough characters available in the input
     * @throws IOException                 If an I/O error occurs
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     */
    public static void parse8bit(@NotNull StreamingDataInput bytes, Appendable appendable, @NonNegative int utflen)
            throws BufferUnderflowException, IOException, ClosedIllegalStateException {
        if (appendable instanceof StringBuilder) {
            @NotNull final StringBuilder sb = (StringBuilder) appendable;
            if (bytes instanceof Bytes && ((Bytes) bytes).bytesStore() instanceof NativeBytesStore) {
                parse8bit_SB1((Bytes) bytes, sb, utflen);
            } else {
                BytesInternal.parse8bit1(bytes, sb, utflen);
            }
        } else {
            BytesInternal.parse8bit1(bytes, appendable, utflen);
        }
    }

    /**
     * Appends a subsequence of the specified CharSequence to an Appendable.
     *
     * @param a     the Appendable to append the characters to
     * @param cs    the CharSequence to read characters from
     * @param start the starting index of the subsequence
     * @param len   the number of characters in the subsequence
     * @throws ArithmeticException         If an arithmetic error occurs
     * @throws BufferUnderflowException    If there are not enough characters available in the CharSequence
     * @throws ClosedIllegalStateException    If the resource has been released or closed.
     * @throws BufferOverflowException     If the Appendable cannot accept more characters
     */
    public static  void append(C a, CharSequence cs, @NonNegative long start, @NonNegative long len)
            throws ArithmeticException, BufferUnderflowException, ClosedIllegalStateException, BufferOverflowException {
        if (a instanceof StringBuilder) {
            if (cs instanceof Bytes)
                ((StringBuilder) a).append(Bytes.toString(((Bytes) cs), start, len));
            else
                ((StringBuilder) a).append(cs.subSequence(Maths.toInt32(start), Maths.toInt32(len)));
        } else if (a instanceof Bytes) {
            ((Bytes) a).appendUtf8(cs, Maths.toInt32(start), Maths.toInt32(len));
        } else {
            throw new UnsupportedOperationException();
        }
    }

    /**
     * Calculates the length of a CharSequence in UTF-8 format.
     *
     * @param str the CharSequence to calculate the length of
     * @return the length of the CharSequence in UTF-8
     * @throws IndexOutOfBoundsException If the CharSequence has invalid indices
     */
    public static long findUtf8Length(@NotNull CharSequence str)
            throws IndexOutOfBoundsException {
        int strlen = str.length();
        long utflen = strlen;/* use charAt instead of copying String to char array */
        for (int i = 0; i < strlen; i++) {
            char c = str.charAt(i);
            if (c <= 0x007F) {
                continue;
            }
            utflen += (c <= 0x07FF) ? 1 : 2;
        }
        return utflen;
    }

    /**
     * Calculates the length of a byte array in UTF-8 format.
     *
     * @param bytes the byte array to calculate the length of
     * @param coder a coder indicating the type of the content
     * @return the length of the byte array in UTF-8
     */
    @Java9
    public static long findUtf8Length(byte[] bytes, byte coder) {
        long utflen;

        if (coder == 0) {
            int strlen = bytes.length;
            utflen = bytes.length;

            //noinspection ForLoopReplaceableByForEach
            for (int i = 0; i < strlen; i++) {
                int b = (bytes[i] & 0xFF);

                if (b > 0x007F) {
                    utflen++;
                }
            }
        } else {
            int strlen = bytes.length;
            utflen = 0;/* use charAt instead of copying String to char array */
            for (int i = 0; i < strlen; i += 2) {
                char c = (char) (((bytes[i + 1] & 0xFF) << 8) | (bytes[i] & 0xFF));

                if (c <= 0x007F) {
                    utflen += 1;
                    continue;
                }
                if (c <= 0x07FF) {
                    utflen += 2;
                } else {
                    utflen += 3;
                }
            }
        }

        return utflen;
    }

    /**
     * Calculates the length of a byte array in UTF-8 format.
     *
     * @param chars the byte array to calculate the length of
     * @return the length of the byte array in UTF-8
     */
    @Java9
    public static long findUtf8Length(byte[] chars) {
        int strlen = chars.length;
        long utflen = strlen; /* use charAt instead of copying String to char array */
        for (int i = 0; i < strlen; i++) {
            int c = chars[i] & 0xFF; // unsigned byte

            if (c == 0) { // we have hit end of string
                break;
            }

            if (c >= 0xF0) {
                utflen += 4;
                i += 3;
            } else if (c >= 0xE0) {
                utflen += 3;
                i += 2;
            } else if (c >= 0xC0) {
                utflen += 2;
                i += 1;
            } else {
                utflen += 1;
            }
        }
        return utflen;
    }

    /**
     * Calculates the length of a character array in UTF-8 format, from a given offset up to the specified length.
     *
     * @param chars  the character array to calculate the length of
     * @param offset the starting index in the character array
     * @param length the number of characters to include in the calculation
     * @return the length of the specified segment of the character array in UTF-8
     */
    public static long findUtf8Length(char[] chars, @NonNegative int offset, @NonNegative int length) {
        requireNonNull(chars);
        long utflen = length;
        for (int i = offset, end = offset + length; i < end; i++) {
            char c = chars[i];
            if (c <= 0x007F) {
                continue;
            }
            if (c <= 0x07FF) {
                utflen++;
            } else {
                utflen += 2;
            }
        }
        return utflen;
    }

    /**
     * Calculates the length of a character array in UTF-8 format.
     *
     * @param chars the character array to calculate the length of
     * @return the length of the character array in UTF-8
     */
    public static long findUtf8Length(char[] chars) {
        return findUtf8Length(chars, 0, chars.length);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy