com.facebook.presto.jdbc.internal.okio.Utf8 Maven / Gradle / Ivy
/*
* Copyright (C) 2017 Square, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.jdbc.internal.okio;
/**
* Okio assumes most applications use UTF-8 exclusively, and offers optimized implementations of
* common operations on UTF-8 strings.
*
*
*
*
* {@link ByteString}
* {@link Buffer}, {@link BufferedSink}, {@link BufferedSource}
*
*
* Encode a string
* {@link ByteString#encodeUtf8(String)}
* {@link BufferedSink#writeUtf8(String)}
*
*
* Encode a code point
*
* {@link BufferedSink#writeUtf8CodePoint(int)}
*
*
* Decode a string
* {@link ByteString#utf8()}
* {@link BufferedSource#readUtf8()}, {@link BufferedSource#readUtf8(long)}
*
*
* Decode a code point
*
* {@link BufferedSource#readUtf8CodePoint()}
*
*
* Decode until the next {@code \r\n} or {@code \n}
*
* {@link BufferedSource#readUtf8LineStrict()},
* {@link BufferedSource#readUtf8LineStrict(long)}
*
*
* Decode until the next {@code \r\n}, {@code \n}, or {@code EOF}
*
* {@link BufferedSource#readUtf8Line()}
*
*
* Measure the bytes in a UTF-8 string
* {@link Utf8#size}, {@link Utf8#size(String, int, int)}
*
*
*/
public final class Utf8 {
private Utf8() {
}
/**
* Returns the number of bytes used to encode {@code string} as UTF-8 when using {@link
* ByteString#encodeUtf8} or {@link Buffer#writeUtf8(String)}.
*/
public static long size(String string) {
return size(string, 0, string.length());
}
/**
* Returns the number of bytes used to encode the slice of {@code string} as UTF-8 when using
* {@link BufferedSink#writeUtf8(String, int, int)}.
*/
public static long size(String string, int beginIndex, int endIndex) {
if (string == null) throw new IllegalArgumentException("string == null");
if (beginIndex < 0) throw new IllegalArgumentException("beginIndex < 0: " + beginIndex);
if (endIndex < beginIndex) {
throw new IllegalArgumentException("endIndex < beginIndex: " + endIndex + " < " + beginIndex);
}
if (endIndex > string.length()) {
throw new IllegalArgumentException(
"endIndex > string.length: " + endIndex + " > " + string.length());
}
long result = 0;
for (int i = beginIndex; i < endIndex;) {
int c = string.charAt(i);
if (c < 0x80) {
// A 7-bit character with 1 byte.
result++;
i++;
} else if (c < 0x800) {
// An 11-bit character with 2 bytes.
result += 2;
i++;
} else if (c < 0xd800 || c > 0xdfff) {
// A 16-bit character with 3 bytes.
result += 3;
i++;
} else {
int low = i + 1 < endIndex ? string.charAt(i + 1) : 0;
if (c > 0xdbff || low < 0xdc00 || low > 0xdfff) {
// A malformed surrogate, which yields '?'.
result++;
i++;
} else {
// A 21-bit character with 4 bytes.
result += 4;
i += 2;
}
}
}
return result;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy