All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.wjybxx.dson.internal.Utf8Util Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023-2024 wjybxx([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package cn.wjybxx.dson.internal;

/**
 * 不建议直接使用,若确实想使用该算法,可考虑拷贝代码
 *
 * @author wjybxx
 * date - 2023/12/16
 */
public class Utf8Util {

    public static int utf8Length(final String str) {
        int utf16Length = str.length();

        int i = 0;
        for (; i < utf16Length && str.charAt(i) < 0x80; i++) {

        }
        if (i == utf16Length) {
            return i;
        }

        int total = i;
        while (i < utf16Length) {
            int c = Character.codePointAt(str, i);
            if (c < 0x80) {
                total += 1;
            } else if (c < 0x800) {
                total += 2;
            } else if (c < 0x10000) {
                total += 3;
            } else {
                total += 4;
            }
            i += Character.charCount(c);
        }
        return total;
    }

    /**
     * utf8编码
     *
     * @param str       要编码的字符串
     * @param outBuffer buffer
     * @param offset    buffer的偏移
     * @param length    buffer的可用长度
     * @return 编码的字节数
     */
    public static int utf8Encode(final String str, byte[] outBuffer, int offset, int length) {
        final int utf16Length = str.length();
        final int limit = offset + length;

        // 单字节字符串优化 -- ASCII码字符优化
        int i = 0;
        int j = offset;
        for (char c; i < utf16Length && (j + i < limit) && (c = str.charAt(i)) < 0x80; i++) {
            outBuffer[j++] = (byte) c;
        }
        if (i == utf16Length) {
            return i;
        }

        int total = i;
        while (i < utf16Length) {
            int c = Character.codePointAt(str, i);
            if (c < 0x80 && (j < limit)) {
                outBuffer[j++] = (byte) c;
                total += 1;
            } else if (c < 0x800 && (j + 2 <= limit)) {
                outBuffer[j++] = ((byte) (0xc0 + (c >> 6)));
                outBuffer[j++] = ((byte) (0x80 + (c & 0x3f)));
                total += 2;
            } else if (c < 0x10000 && (j + 3 <= limit)) {
                // 最大单字符码位为0xFFFF, 16位,3个UTF-8字节
                outBuffer[j++] = ((byte) (0xe0 + (c >> 12)));
                outBuffer[j++] = ((byte) (0x80 + ((c >> 6) & 0x3f)));
                outBuffer[j++] = ((byte) (0x80 + (c & 0x3f)));
                total += 3;
            } else {
                // 可能是由于空间不够进入到这里
                if (c < 0x10000 || (j + 4 > limit)) {
                    throw new ArrayIndexOutOfBoundsException("Failed writing " + c + " at index " + j);
                }
                outBuffer[j++] = ((byte) (0xf0 + (c >> 18)));
                outBuffer[j++] = ((byte) (0x80 + ((c >> 12) & 0x3f)));
                outBuffer[j++] = ((byte) (0x80 + ((c >> 6) & 0x3f)));
                outBuffer[j++] = ((byte) (0x80 + (c & 0x3f)));
                total += 4;
            }
            i += Character.charCount(c);
        }
        return total;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy