All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fluxtion.agrona.AsciiEncoding Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014-2024 Real Logic Limited.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.fluxtion.agrona;

import java.math.BigInteger;

import static java.nio.charset.StandardCharsets.US_ASCII;

/**
 * Helper for dealing with ASCII encoding of numbers.
 */
public final class AsciiEncoding
{
    /**
     * Maximum number of digits in a US-ASCII-encoded int.
     */
    public static final int INT_MAX_DIGITS = 10;

    /**
     * Maximum number of digits in a US-ASCII-encoded long.
     */
    public static final int LONG_MAX_DIGITS = 19;

    /**
     * An absolute value of the {@link Integer#MIN_VALUE} as long.
     */
    public static final long INTEGER_ABSOLUTE_MIN_VALUE = Math.abs((long)Integer.MIN_VALUE);

    /**
     * US-ASCII-encoded byte representation of the {@link Integer#MIN_VALUE}.
     */
    public static final byte[] MIN_INTEGER_VALUE = String.valueOf(Integer.MIN_VALUE).getBytes(US_ASCII);

    /**
     * US-ASCII-encoded byte representation of the {@link Integer#MAX_VALUE}.
     */
    public static final byte[] MAX_INTEGER_VALUE = String.valueOf(Integer.MAX_VALUE).getBytes(US_ASCII);

    /**
     * US-ASCII-encoded byte representation of the {@link Long#MIN_VALUE}.
     */
    public static final byte[] MIN_LONG_VALUE = String.valueOf(Long.MIN_VALUE).getBytes(US_ASCII);

    /**
     * US-ASCII-encoded byte representation of the {@link Long#MAX_VALUE}.
     */
    public static final byte[] MAX_LONG_VALUE = String.valueOf(Long.MAX_VALUE).getBytes(US_ASCII);

    /**
     * Byte value of the minus sign ('{@code -}').
     */
    public static final byte MINUS_SIGN = '-';

    /**
     * Byte value of zero character ('{@code 0}').
     */
    public static final byte ZERO = '0';

    /**
     * Lookup table used for encoding ints/longs as ASCII characters.
     */
    public static final byte[] ASCII_DIGITS = new byte[]
    {
        '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
        '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
        '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
        '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
        '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
        '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
        '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
        '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
        '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
        '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
    };

    /**
     * {@link Long#MAX_VALUE} split into components by 8 digits max.
     */
    public static final int[] LONG_MAX_VALUE_DIGITS = new int[]{ 92233720, 36854775, 807 };

    /**
     * {@link Long#MIN_VALUE} split into components by 8 digits max.
     */
    public static final int[] LONG_MIN_VALUE_DIGITS = new int[]{ 92233720, 36854775, 808 };

    /**
     * Power of ten for int values.
     */
    public static final int[] INT_POW_10 =
    {
        1, 10, 100, 1_000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 1_000_000_000
    };

    /**
     * Power of ten for long values.
     */
    public static final long[] LONG_POW_10 =
    {
        1L, 10L, 100L, 1_000L, 10_000L, 100_000L, 1_000_000L, 10_000_000L, 100_000_000L, 1_000_000_000L,
        10_000_000_000L, 100_000_000_000L, 1_000_000_000_000L, 10_000_000_000_000L, 100_000_000_000_000L,
        1_000_000_000_000_000L, 10_000_000_000_000_000L, 100_000_000_000_000_000L, 1_000_000_000_000_000_000L
    };

    private static final long[] INT_DIGITS = new long[32];

    private static final long[] LONG_DIGITS = new long[64];

    static
    {
        for (int i = 1; i < 33; i++)
        {
            final int smallest = 1 << (i - 1);
            final long smallestLog10 = (long)Math.ceil(Math.log10(smallest) / Math.log10(10));
            if (1 == i)
            {
                INT_DIGITS[i - 1] = 1L << 32;
            }
            else if (i < 31)
            {
                INT_DIGITS[i - 1] = (1L << 32) - LONG_POW_10[(int)smallestLog10] + (smallestLog10 << 32);
            }
            else
            {
                INT_DIGITS[i - 1] = smallestLog10 << 32;
            }
        }

        final BigInteger tenToNineteen = BigInteger.TEN.pow(19);
        for (int i = 0; i < 64; i++)
        {
            if (0 == i)
            {
                LONG_DIGITS[i] = 1L << 52;
            }
            else
            {
                final int upper = ((i * 1262611) >> 22) + 1;
                final long correction = upper < LONG_MAX_DIGITS ? LONG_POW_10[upper] >> (i >> 2) :
                    tenToNineteen.shiftRight(i >> 2).longValueExact();
                final long value = ((long)(upper + 1) << 52) - correction;
                LONG_DIGITS[i] = value;
            }
        }
    }

    private AsciiEncoding()
    {
    }

    /**
     * Calling this method is equivalent of doing:
     * 
     * {@code digitCount(value) - 1}
     * 
* * @param value to find the end encoded character offset. * @return the offset at which the encoded value will end. * @see #digitCount(int) * @deprecated Use {@link #digitCount(int)} instead. */ @Deprecated public static int endOffset(final int value) { return digitCount(value) - 1; } /** * Calling this method is equivalent of doing: *
     * {@code digitCount(value) - 1}
     * 
* * @param value to find the end encoded character offset. * @return the offset at which the encoded value will end. * @see #digitCount(long) * @deprecated Use {@link #digitCount(long)} instead. */ @Deprecated public static int endOffset(final long value) { return digitCount(value) - 1; } /** * Count number of digits in a positive {@code int} value. * *

Implementation is based on the Kendall Willets' idea as presented in the * Computing the number of digits of an integer even faster blog post. * *

* Use {@code org.agrona.AsciiEncodingTest#printDigitCountIntTable()} to regenerate lookup table. * * @param value to count number of digits int. * @return number of digits in a number, e.g. if input value is {@code 123} then the result will be {@code 3}. */ public static int digitCount(final int value) { return (int)((value + INT_DIGITS[31 - Integer.numberOfLeadingZeros(value | 1)]) >> 32); } /** * Count number of digits in a positive {@code long} value. * *

Implementation is based on the Kendall Willets' idea as presented in the * Computing the number of digits of an integer even faster blog post. * *

* Use {@code org.agrona.AsciiEncodingTest#printDigitCountLongTable()} to regenerate lookup table. * * @param value to count number of digits int. * @return number of digits in a number, e.g. if input value is {@code 12345678909876} then the result will be * {@code 14}. */ public static int digitCount(final long value) { final int floorLog2 = 63 ^ Long.numberOfLeadingZeros(value | 1); return (int)((LONG_DIGITS[floorLog2] + (value >> (floorLog2 >> 2))) >> 52); } /** * Check if the {@code value} is an ASCII-encoded digit. * * @param value ti be checked. * @return {@code true} if the {@code value} is an ASCII-encoded digit. */ public static boolean isDigit(final byte value) { return value >= 0x30 && value <= 0x39; } /** * Get the digit value of an ASCII encoded {@code byte}. * * @param index within the string the value is encoded. * @param value of the encoding in ASCII. * @return the digit value of the encoded ASCII. * @throws AsciiNumberFormatException if the value is not a digit. */ public static int getDigit(final int index, final byte value) { if (value < 0x30 || value > 0x39) { throw new AsciiNumberFormatException("'" + ((char)value) + "' is not a valid digit @ " + index); } return value - 0x30; } /** * Get the digit value of an ASCII encoded {@code char}. * * @param index within the string the value is encoded. * @param value of the encoding in ASCII. * @return the digit value of the encoded ASCII. * @throws AsciiNumberFormatException if the value is not a digit. */ public static int getDigit(final int index, final char value) { if (value < 0x30 || value > 0x39) { throw new AsciiNumberFormatException("'" + value + "' is not a valid digit @ " + index); } return value - 0x30; } /** * Parse an ASCII encoded int from a {@link CharSequence}. * * @param cs to parse. * @param index at which the number begins. * @param length of the encoded number in characters. * @return the parsed value. * @throws AsciiNumberFormatException if {@code length <= 0} or {@code cs} is not an int value */ public static int parseIntAscii(final CharSequence cs, final int index, final int length) { if (length <= 0) { throw new AsciiNumberFormatException("empty string: index=" + index + " length=" + length); } final boolean negative = MINUS_SIGN == cs.charAt(index); int i = index; if (negative) { i++; if (1 == length) { throwParseIntError(cs, index, length); } } final int end = index + length; if (end - i < INT_MAX_DIGITS) { final int tally = parsePositiveIntAscii(cs, index, length, i, end); return negative ? -tally : tally; } else { final long tally = parsePositiveIntAsciiOverflowCheck(cs, index, length, i, end); if (tally > INTEGER_ABSOLUTE_MIN_VALUE || INTEGER_ABSOLUTE_MIN_VALUE == tally && !negative) { throwParseIntOverflowError(cs, index, length); } return (int)(negative ? -tally : tally); } } /** * Parse an ASCII encoded long from a {@link CharSequence}. * * @param cs to parse. * @param index at which the number begins. * @param length of the encoded number in characters. * @return the parsed value. * @throws AsciiNumberFormatException if {@code length <= 0} or {@code cs} is not a long value */ public static long parseLongAscii(final CharSequence cs, final int index, final int length) { if (length <= 0) { throw new AsciiNumberFormatException("empty string: index=" + index + " length=" + length); } final boolean negative = MINUS_SIGN == cs.charAt(index); int i = index; if (negative) { i++; if (1 == length) { throwParseLongError(cs, index, length); } } final int end = index + length; if (end - i < LONG_MAX_DIGITS) { final long tally = parsePositiveLongAscii(cs, index, length, i, end); return negative ? -tally : tally; } else if (negative) { return -parseLongAsciiOverflowCheck(cs, index, length, LONG_MIN_VALUE_DIGITS, i, end); } else { return parseLongAsciiOverflowCheck(cs, index, length, LONG_MAX_VALUE_DIGITS, i, end); } } /** * Checks if the provided {@code value} represents an ASCII-encoded number which contains exactly four digits. * * @param value four ASCII-encoded bytes to check. * @return {@code true} if the {@code value} is an ASCII-encoded number with four digits in it. */ public static boolean isFourDigitsAsciiEncodedNumber(final int value) { return 0 == ((((value + 0x46464646) | (value - 0x30303030)) & 0x80808080)); } /** * Parses a four-digit number out of an ASCII-encoded value assuming little-endian byte order. * * @param bytes ASCII-encoded value in little-endian byte order. * @return {@code int} value with four digits. */ public static int parseFourDigitsLittleEndian(final int bytes) { int val = bytes & 0x0F0F0F0F; val = (val * 10) + (val >> 8); return ((val & 0x00FF00FF) * 6553601) >> 16; } /** * Checks if the provided {@code value} represents an ASCII-encoded number which contains exactly eight digits. * * @param value eoght ASCII-encoded bytes to check. * @return {@code true} if the {@code value} is an ASCII-encoded number with eight digits in it. */ public static boolean isEightDigitAsciiEncodedNumber(final long value) { return 0L == ((((value + 0x4646464646464646L) | (value - 0x3030303030303030L)) & 0x8080808080808080L)); } /** * Parses an eight-digit number out of an ASCII-encoded value assuming little-endian byte order. * * @param bytes ASCII-encoded value in little-endian byte order. * @return {@code int} value with eight digits. */ public static int parseEightDigitsLittleEndian(final long bytes) { long val = bytes - 0x3030303030303030L; val = (val * 10) + (val >> 8); val = (((val & 0x000000FF000000FFL) * 0x000F424000000064L) + (((val >> 16) & 0x000000FF000000FFL) * 0x0000271000000001L)) >> 32; return (int)val; } private static int parsePositiveIntAscii( final CharSequence cs, final int index, final int length, final int startIndex, final int end) { int i = startIndex; int tally = 0, quartet; while ((end - i) >= 4 && isFourDigitsAsciiEncodedNumber(quartet = readFourBytesLittleEndian(cs, i))) { tally = (tally * 10_000) + parseFourDigitsLittleEndian(quartet); i += 4; } byte digit; while (i < end && isDigit(digit = (byte)cs.charAt(i))) { tally = (tally * 10) + (digit - 0x30); i++; } if (i != end) { throwParseIntError(cs, index, length); } return tally; } private static long parsePositiveIntAsciiOverflowCheck( final CharSequence cs, final int index, final int length, final int startIndex, final int end) { if ((end - startIndex) > INT_MAX_DIGITS) { throwParseIntOverflowError(cs, index, length); } int i = startIndex; long tally = 0; final long octet = readEightBytesLittleEndian(cs, i); if (isEightDigitAsciiEncodedNumber(octet)) { tally = parseEightDigitsLittleEndian(octet); i += 8; byte digit; while (i < end && isDigit(digit = (byte)cs.charAt(i))) { tally = (tally * 10L) + (digit - 0x30); i++; } } if (i != end) { throwParseIntError(cs, index, length); } return tally; } private static void throwParseIntError(final CharSequence cs, final int index, final int length) { throw new AsciiNumberFormatException("error parsing int: " + cs.subSequence(index, index + length)); } private static void throwParseIntOverflowError(final CharSequence cs, final int index, final int length) { throw new AsciiNumberFormatException("int overflow parsing: " + cs.subSequence(index, index + length)); } private static long parsePositiveLongAscii( final CharSequence cs, final int index, final int length, final int startIndex, final int end) { int i = startIndex; long tally = 0, octet; while ((end - i) >= 8 && isEightDigitAsciiEncodedNumber(octet = readEightBytesLittleEndian(cs, i))) { tally = (tally * 100_000_000L) + parseEightDigitsLittleEndian(octet); i += 8; } int quartet; while ((end - i) >= 4 && isFourDigitsAsciiEncodedNumber(quartet = readFourBytesLittleEndian(cs, i))) { tally = (tally * 10_000L) + parseFourDigitsLittleEndian(quartet); i += 4; } byte digit; while (i < end && isDigit(digit = (byte)cs.charAt(i))) { tally = (tally * 10) + (digit - 0x30); i++; } if (i != end) { throwParseLongError(cs, index, length); } return tally; } private static long parseLongAsciiOverflowCheck( final CharSequence cs, final int index, final int length, final int[] maxValue, final int startIndex, final int end) { if ((end - startIndex) > LONG_MAX_DIGITS) { throwParseLongOverflowError(cs, index, length); } int i = startIndex, k = 0; boolean checkOverflow = true; long tally = 0, octet; while ((end - i) >= 8 && isEightDigitAsciiEncodedNumber(octet = readEightBytesLittleEndian(cs, i))) { final int eightDigits = parseEightDigitsLittleEndian(octet); if (checkOverflow) { if (eightDigits > maxValue[k]) { throwParseLongOverflowError(cs, index, length); } else if (eightDigits < maxValue[k]) { checkOverflow = false; } k++; } tally = (tally * 100_000_000L) + eightDigits; i += 8; } byte digit; int lastDigits = 0; while (i < end && isDigit(digit = (byte)cs.charAt(i))) { lastDigits = (lastDigits * 10) + (digit - 0x30); i++; } if (i != end) { throwParseLongError(cs, index, length); } else if (checkOverflow && lastDigits > maxValue[k]) { throwParseLongOverflowError(cs, index, length); } return (tally * 1000L) + lastDigits; } private static void throwParseLongError(final CharSequence cs, final int index, final int length) { throw new AsciiNumberFormatException("error parsing long: " + cs.subSequence(index, index + length)); } private static void throwParseLongOverflowError(final CharSequence cs, final int index, final int length) { throw new AsciiNumberFormatException("long overflow parsing: " + cs.subSequence(index, index + length)); } private static int readFourBytesLittleEndian(final CharSequence cs, final int index) { return cs.charAt(index + 3) << 24 | cs.charAt(index + 2) << 16 | cs.charAt(index + 1) << 8 | cs.charAt(index); } private static long readEightBytesLittleEndian(final CharSequence cs, final int index) { return (long)cs.charAt(index + 7) << 56 | (long)cs.charAt(index + 6) << 48 | (long)cs.charAt(index + 5) << 40 | (long)cs.charAt(index + 4) << 32 | (long)cs.charAt(index + 3) << 24 | (long)cs.charAt(index + 2) << 16 | cs.charAt(index + 1) << 8 | cs.charAt(index); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy