All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.utils.BinaryStringUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.	See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.	You may obtain a copy of the License at
 *
 *		http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.utils;

import org.dinky.shaded.paimon.data.BinaryString;
import org.dinky.shaded.paimon.data.Timestamp;
import org.dinky.shaded.paimon.memory.MemorySegmentUtils;
import org.dinky.shaded.paimon.types.DataType;
import org.dinky.shaded.paimon.types.DataTypeChecks;

import java.time.DateTimeException;
import java.util.Arrays;
import java.util.List;
import java.util.TimeZone;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.dinky.shaded.paimon.data.BinaryString.fromString;
import static org.dinky.shaded.paimon.types.DataTypeRoot.BINARY;
import static org.dinky.shaded.paimon.types.DataTypeRoot.CHAR;

/** Util for {@link BinaryString}. */
public class BinaryStringUtils {

    public static final BinaryString NULL_STRING = fromString("NULL");
    public static final BinaryString TRUE_STRING = fromString("TRUE");
    public static final BinaryString FALSE_STRING = fromString("FALSE");

    public static final BinaryString[] EMPTY_STRING_ARRAY = new BinaryString[0];
    private static final List TRUE_STRINGS =
            Stream.of("t", "true", "y", "yes", "1")
                    .map(BinaryString::fromString)
                    .collect(Collectors.toList());

    private static final List FALSE_STRINGS =
            Stream.of("f", "false", "n", "no", "0")
                    .map(BinaryString::fromString)
                    .collect(Collectors.toList());

    private static byte[] getTmpBytes(BinaryString str, int sizeInBytes) {
        byte[] bytes = MemorySegmentUtils.allocateReuseBytes(sizeInBytes);
        MemorySegmentUtils.copyToBytes(str.getSegments(), str.getOffset(), bytes, 0, sizeInBytes);
        return bytes;
    }

    /** Parse a {@link BinaryString} to boolean. */
    public static boolean toBoolean(BinaryString str) {
        BinaryString lowerCase = str.toLowerCase();
        if (TRUE_STRINGS.contains(lowerCase)) {
            return true;
        }
        if (FALSE_STRINGS.contains(lowerCase)) {
            return false;
        }
        throw new RuntimeException("Cannot parse '" + str + "' as BOOLEAN.");
    }

    /**
     * Parses this BinaryString to Long.
     *
     * 

Note that, in this method we accumulate the result in negative format, and convert it to * positive format at the end, if this string is not started with '-'. This is because min value * is bigger than max value in digits, e.g. Long.MAX_VALUE is '9223372036854775807' and * Long.MIN_VALUE is '-9223372036854775808'. * *

This code is mostly copied from LazyLong.parseLong in Hive. */ public static long toLong(BinaryString str) throws NumberFormatException { int sizeInBytes = str.getSizeInBytes(); byte[] tmpBytes = getTmpBytes(str, sizeInBytes); if (sizeInBytes == 0) { throw numberFormatExceptionFor(str, "Input is empty."); } int i = 0; byte b = tmpBytes[i]; final boolean negative = b == '-'; if (negative || b == '+') { i++; if (sizeInBytes == 1) { throw numberFormatExceptionFor(str, "Input has only positive or negative symbol."); } } long result = 0; final byte separator = '.'; final int radix = 10; final long stopValue = Long.MIN_VALUE / radix; while (i < sizeInBytes) { b = tmpBytes[i]; i++; if (b == separator) { // We allow decimals and will return a truncated integral in that case. // Therefore we won't throw an exception here (checking the fractional // part happens below.) break; } int digit; if (b >= '0' && b <= '9') { digit = b - '0'; } else { throw numberFormatExceptionFor(str, "Invalid character found."); } // We are going to process the new digit and accumulate the result. However, before // doing this, if the result is already smaller than the // stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller // than minValue, and we can stop. if (result < stopValue) { throw numberFormatExceptionFor(str, "Overflow."); } result = result * radix - digit; // Since the previous result is less than or equal to // stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow. // If result overflows, we should stop. if (result > 0) { throw numberFormatExceptionFor(str, "Overflow."); } } // This is the case when we've encountered a decimal separator. The fractional // part will not change the number, but we will verify that the fractional part // is well formed. while (i < sizeInBytes) { byte currentByte = tmpBytes[i]; if (currentByte < '0' || currentByte > '9') { throw numberFormatExceptionFor(str, "Invalid character found."); } i++; } if (!negative) { result = -result; if (result < 0) { throw numberFormatExceptionFor(str, "Overflow."); } } return result; } /** * Parses this BinaryString to Int. * *

Note that, in this method we accumulate the result in negative format, and convert it to * positive format at the end, if this string is not started with '-'. This is because min value * is bigger than max value in digits, e.g. Integer.MAX_VALUE is '2147483647' and * Integer.MIN_VALUE is '-2147483648'. * *

This code is mostly copied from LazyInt.parseInt in Hive. * *

Note that, this method is almost same as `toLong`, but we leave it duplicated for * performance reasons, like Hive does. */ public static int toInt(BinaryString str) throws NumberFormatException { int sizeInBytes = str.getSizeInBytes(); byte[] tmpBytes = getTmpBytes(str, sizeInBytes); if (sizeInBytes == 0) { throw numberFormatExceptionFor(str, "Input is empty."); } int i = 0; byte b = tmpBytes[i]; final boolean negative = b == '-'; if (negative || b == '+') { i++; if (sizeInBytes == 1) { throw numberFormatExceptionFor(str, "Input has only positive or negative symbol."); } } int result = 0; final byte separator = '.'; final int radix = 10; final long stopValue = Integer.MIN_VALUE / radix; while (i < sizeInBytes) { b = tmpBytes[i]; i++; if (b == separator) { // We allow decimals and will return a truncated integral in that case. // Therefore we won't throw an exception here (checking the fractional // part happens below.) break; } int digit; if (b >= '0' && b <= '9') { digit = b - '0'; } else { throw numberFormatExceptionFor(str, "Invalid character found."); } // We are going to process the new digit and accumulate the result. However, before // doing this, if the result is already smaller than the // stopValue(Long.MIN_VALUE / radix), then result * 10 will definitely be smaller // than minValue, and we can stop. if (result < stopValue) { throw numberFormatExceptionFor(str, "Overflow."); } result = result * radix - digit; // Since the previous result is less than or equal to // stopValue(Long.MIN_VALUE / radix), we can just use `result > 0` to check overflow. // If result overflows, we should stop. if (result > 0) { throw numberFormatExceptionFor(str, "Overflow."); } } // This is the case when we've encountered a decimal separator. The fractional // part will not change the number, but we will verify that the fractional part // is well formed. while (i < sizeInBytes) { byte currentByte = tmpBytes[i]; if (currentByte < '0' || currentByte > '9') { throw numberFormatExceptionFor(str, "Invalid character found."); } i++; } if (!negative) { result = -result; if (result < 0) { throw numberFormatExceptionFor(str, "Overflow."); } } return result; } public static short toShort(BinaryString str) throws NumberFormatException { int intValue = toInt(str); short result = (short) intValue; if (result == intValue) { return result; } throw numberFormatExceptionFor(str, "Overflow."); } public static byte toByte(BinaryString str) throws NumberFormatException { int intValue = toInt(str); byte result = (byte) intValue; if (result == intValue) { return result; } throw numberFormatExceptionFor(str, "Overflow."); } public static double toDouble(BinaryString str) throws NumberFormatException { return Double.parseDouble(str.toString()); } public static float toFloat(BinaryString str) throws NumberFormatException { return Float.parseFloat(str.toString()); } private static NumberFormatException numberFormatExceptionFor( BinaryString input, String reason) { return new NumberFormatException("For input string: '" + input + "'. " + reason); } public static int toDate(BinaryString input) throws DateTimeException { Integer date = DateTimeUtils.parseDate(input.toString()); if (date == null) { throw new DateTimeException("For input string: '" + input + "'."); } return date; } public static int toTime(BinaryString input) throws DateTimeException { Integer date = DateTimeUtils.parseTime(input.toString()); if (date == null) { throw new DateTimeException("For input string: '" + input + "'."); } return date; } /** Used by {@code CAST(x as TIMESTAMP)}. */ public static Timestamp toTimestamp(BinaryString input, int precision) throws DateTimeException { return DateTimeUtils.parseTimestampData(input.toString(), precision); } /** Used by {@code CAST(x as TIMESTAMP_LTZ)}. */ public static Timestamp toTimestamp(BinaryString input, int precision, TimeZone timeZone) throws DateTimeException { return DateTimeUtils.parseTimestampData(input.toString(), precision, timeZone); } public static BinaryString toCharacterString(BinaryString strData, DataType type) { final boolean targetCharType = type.getTypeRoot() == CHAR; final int targetLength = DataTypeChecks.getLength(type); if (strData.numChars() > targetLength) { return strData.substring(0, targetLength); } else if (strData.numChars() < targetLength && targetCharType) { int padLength = targetLength - strData.numChars(); BinaryString padString = BinaryString.blankString(padLength); return StringUtils.concat(strData, padString); } return strData; } public static byte[] toBinaryString(byte[] byteArrayTerm, DataType type) { final boolean targetBinaryType = type.getTypeRoot() == BINARY; final int targetLength = DataTypeChecks.getLength(type); if (byteArrayTerm.length == targetLength) { return byteArrayTerm; } if (targetBinaryType) { return Arrays.copyOf(byteArrayTerm, targetLength); } else { if (byteArrayTerm.length <= targetLength) { return byteArrayTerm; } else { return Arrays.copyOf(byteArrayTerm, targetLength); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy