org.apache.avro.util.Utf8 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.util;
import java.nio.charset.Charset;
import java.io.UnsupportedEncodingException;
import org.apache.avro.io.BinaryData;
/** A Utf8 string. Unlike {@link String}, instances are mutable. This is more
* efficient than {@link String} when reading or writing a sequence of values,
* as a single instance may be reused. */
public class Utf8 implements Comparable, CharSequence {
private static final byte[] EMPTY = new byte[0];
private static final Charset UTF8 = Charset.forName("UTF-8");
private byte[] bytes = EMPTY;
private int length;
private String string;
public Utf8() {}
public Utf8(String string) {
this.bytes = getBytesFor(string);
this.length = bytes.length;
this.string = string;
}
public Utf8(Utf8 other) {
this.length = other.length;
this.bytes = new byte[other.length];
System.arraycopy(other.bytes, 0, this.bytes, 0, this.length);
this.string = other.string;
}
public Utf8(byte[] bytes) {
this.bytes = bytes;
this.length = bytes.length;
}
/** Return UTF-8 encoded bytes.
* Only valid through {@link #getByteLength()}. */
public byte[] getBytes() { return bytes; }
/** Return length in bytes.
* @deprecated call {@link #getByteLength()} instead. */
public int getLength() { return length; }
/** Return length in bytes. */
public int getByteLength() { return length; }
/** Set length in bytes. Should called whenever byte content changes, even
* if the length does not change, as this also clears the cached String.
* @deprecated call {@link #setByteLength(int)} instead. */
public Utf8 setLength(int newLength) {
return setByteLength(newLength);
}
/** Set length in bytes. Should called whenever byte content changes, even
* if the length does not change, as this also clears the cached String. */
public Utf8 setByteLength(int newLength) {
if (this.bytes.length < newLength) {
byte[] newBytes = new byte[newLength];
System.arraycopy(bytes, 0, newBytes, 0, this.length);
this.bytes = newBytes;
}
this.length = newLength;
this.string = null;
return this;
}
/** Set to the contents of a String. */
public Utf8 set(String string) {
this.bytes = getBytesFor(string);
this.length = bytes.length;
this.string = string;
return this;
}
private abstract static class Utf8Converter {
public abstract String fromUtf8(byte[] bytes, int length);
public abstract byte[] toUtf8(String str);
}
private static final Utf8Converter UTF8_CONVERTER =
System.getProperty("java.version").startsWith("1.6.")
? new Utf8Converter() { // optimized for Java 6
public String fromUtf8(byte[] bytes, int length) {
try {
return new String(bytes, 0, length, "UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public byte[] toUtf8(String str) {
try {
return str.getBytes("UTF-8");
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
}
: new Utf8Converter() { // faster in Java 7 & 8
public String fromUtf8(byte[] bytes, int length) {
return new String(bytes, 0, length, UTF8);
}
public byte[] toUtf8(String str) {
return str.getBytes(UTF8);
}
};
@Override
public String toString() {
if (this.length == 0) return "";
if (this.string == null) {
this.string = UTF8_CONVERTER.fromUtf8(bytes, length);
}
return this.string;
}
@Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof Utf8)) return false;
Utf8 that = (Utf8)o;
if (!(this.length == that.length)) return false;
byte[] thatBytes = that.bytes;
for (int i = 0; i < this.length; i++)
if (bytes[i] != thatBytes[i])
return false;
return true;
}
@Override
public int hashCode() {
int hash = 0;
for (int i = 0; i < this.length; i++)
hash = hash*31 + bytes[i];
return hash;
}
@Override
public int compareTo(Utf8 that) {
return BinaryData.compareBytes(this.bytes, 0, this.length,
that.bytes, 0, that.length);
}
// CharSequence implementation
@Override public char charAt(int index) { return toString().charAt(index); }
@Override public int length() { return toString().length(); }
@Override public CharSequence subSequence(int start, int end) {
return toString().subSequence(start, end);
}
/** Gets the UTF-8 bytes for a String */
public static final byte[] getBytesFor(String str) {
return UTF8_CONVERTER.toUtf8(str);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy