All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.jdbc.internal.okio.Utf8 Maven / Gradle / Ivy

/*
 * Copyright (C) 2017 Square, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.jdbc.internal.okio;

/**
 * Okio assumes most applications use UTF-8 exclusively, and offers optimized implementations of
 * common operations on UTF-8 strings.
 *
 * 

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
{@link ByteString}{@link Buffer}, {@link BufferedSink}, {@link BufferedSource}
Encode a string{@link ByteString#encodeUtf8(String)}{@link BufferedSink#writeUtf8(String)}
Encode a code point{@link BufferedSink#writeUtf8CodePoint(int)}
Decode a string{@link ByteString#utf8()}{@link BufferedSource#readUtf8()}, {@link BufferedSource#readUtf8(long)}
Decode a code point{@link BufferedSource#readUtf8CodePoint()}
Decode until the next {@code \r\n} or {@code \n}{@link BufferedSource#readUtf8LineStrict()}, * {@link BufferedSource#readUtf8LineStrict(long)}
Decode until the next {@code \r\n}, {@code \n}, or {@code EOF}{@link BufferedSource#readUtf8Line()}
Measure the bytes in a UTF-8 string{@link Utf8#size}, {@link Utf8#size(String, int, int)}
*/ public final class Utf8 { private Utf8() { } /** * Returns the number of bytes used to encode {@code string} as UTF-8 when using {@link * ByteString#encodeUtf8} or {@link Buffer#writeUtf8(String)}. */ public static long size(String string) { return size(string, 0, string.length()); } /** * Returns the number of bytes used to encode the slice of {@code string} as UTF-8 when using * {@link BufferedSink#writeUtf8(String, int, int)}. */ public static long size(String string, int beginIndex, int endIndex) { if (string == null) throw new IllegalArgumentException("string == null"); if (beginIndex < 0) throw new IllegalArgumentException("beginIndex < 0: " + beginIndex); if (endIndex < beginIndex) { throw new IllegalArgumentException("endIndex < beginIndex: " + endIndex + " < " + beginIndex); } if (endIndex > string.length()) { throw new IllegalArgumentException( "endIndex > string.length: " + endIndex + " > " + string.length()); } long result = 0; for (int i = beginIndex; i < endIndex;) { int c = string.charAt(i); if (c < 0x80) { // A 7-bit character with 1 byte. result++; i++; } else if (c < 0x800) { // An 11-bit character with 2 bytes. result += 2; i++; } else if (c < 0xd800 || c > 0xdfff) { // A 16-bit character with 3 bytes. result += 3; i++; } else { int low = i + 1 < endIndex ? string.charAt(i + 1) : 0; if (c > 0xdbff || low < 0xdc00 || low > 0xdfff) { // A malformed surrogate, which yields '?'. result++; i++; } else { // A 21-bit character with 4 bytes. result += 4; i += 2; } } } return result; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy