org.apache.abdera.i18n.text.Codepoint Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License. For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.text;
import java.io.Serializable;
import java.io.UnsupportedEncodingException;
/**
* Represents a single Unicode Codepoint
*/
public class Codepoint implements Serializable, Cloneable, Comparable {
private static final long serialVersionUID = 140337939131905483L;
private static final String DEFAULT_ENCODING = "UTF-8";
private final int value;
/**
* Create a Codepoint from a byte array using the default encoding (UTF-8)
*/
public Codepoint(byte[] bytes) {
try {
this.value = valueFromCharSequence(new String(bytes, DEFAULT_ENCODING));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
/**
* Create a Codepoint from a byte array with the specified charset encoding. Length must equal 1
*/
public Codepoint(byte[] bytes, String encoding) throws UnsupportedEncodingException {
this.value = valueFromCharSequence(new String(bytes, encoding));
}
/**
* Create a Codepoint from a CharSequence. Length must equal 1
*/
public Codepoint(CharSequence value) {
this(valueFromCharSequence(value));
}
private static int valueFromCharSequence(CharSequence s) {
if (s.length() == 1) {
return (int)s.charAt(0);
} else if (s.length() > 2) {
throw new IllegalArgumentException("Too many chars");
} else {
char high = s.charAt(0);
char low = s.charAt(1);
return CharUtils.toSupplementary(high, low).getValue();
}
}
/**
* Create a codepoint from a single char
*/
public Codepoint(char value) {
this((int)value);
}
/**
* Create a codepoint from a surrogate pair
*/
public Codepoint(char high, char low) {
this(CharUtils.toSupplementary(high, low).getValue());
}
/**
* Create a codepoint as a copy of another codepoint
*/
public Codepoint(Codepoint codepoint) {
this(codepoint.value);
}
/**
* Create a codepoint from a specific integer value
*/
public Codepoint(int value) {
if (value < 0)
throw new IllegalArgumentException("Invalid Codepoint");
this.value = value;
}
/**
* The codepoint value
*/
public int getValue() {
return value;
}
/**
* True if this codepoint is supplementary
*/
public boolean isSupplementary() {
return CharUtils.isSupplementary(value);
}
/**
* True if this codepoint is a low surrogate
*/
public boolean isLowSurrogate() {
return CharUtils.isLowSurrogate((char)value);
}
/**
* True if this codepoint is a high surrogate
*/
public boolean isHighSurrogate() {
return CharUtils.isHighSurrogate((char)value);
}
/**
* Get the high surrogate of this Codepoint
*/
public char getHighSurrogate() {
return CharUtils.getHighSurrogate(value);
}
/**
* Get the low surrogate of this Codepoint
*/
public char getLowSurrogate() {
return CharUtils.getLowSurrogate(value);
}
/**
* True if this Codepoint is a bidi control char
*/
public boolean isBidi() {
return CharUtils.isBidi(value);
}
public boolean isDigit() {
return CharUtils.isDigit(value);
}
public boolean isAlpha() {
return CharUtils.isAlpha(value);
}
public boolean isAlphaDigit() {
return CharUtils.isAlpha(value);
}
public int compareTo(Codepoint o) {
return value < o.value ? -1 : value == o.value ? 0 : 1;
}
public String toString() {
return CharUtils.toString(value);
}
public char[] toChars() {
return toString().toCharArray();
}
/**
* Get the number of chars necessary to represent this codepoint. Returns 2 if this is a supplementary codepoint
*/
public int getCharCount() {
return toChars().length;
}
public byte[] toBytes() {
try {
return toBytes(DEFAULT_ENCODING);
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
}
public byte[] toBytes(String encoding) throws UnsupportedEncodingException {
return toString().getBytes(encoding);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + value;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
final Codepoint other = (Codepoint)obj;
if (value != other.value)
return false;
return true;
}
/**
* Plane 0 (0000–FFFF): Basic Multilingual Plane (BMP). This is the plane containing most of the character
* assignments so far. A primary objective for the BMP is to support the unification of prior character sets as well
* as characters for writing systems in current use. Plane 1 (10000–1FFFF): Supplementary Multilingual Plane
* (SMP). Plane 2 (20000–2FFFF): Supplementary Ideographic Plane (SIP) Planes 3 to 13 (30000–DFFFF) are
* unassigned Plane 14 (E0000–EFFFF): Supplementary Special-purpose Plane (SSP) Plane 15 (F0000–FFFFF) reserved
* for the Private Use Area (PUA) Plane 16 (100000–10FFFF), reserved for the Private Use Area (PUA)
**/
public int getPlane() {
return value / (0xFFFF + 1);
}
public Codepoint clone() {
try {
return (Codepoint)super.clone();
} catch (CloneNotSupportedException e) {
return new Codepoint(value);
}
}
/**
* Get the next codepoint
*/
public Codepoint next() {
if (value == 0x10ffff)
throw new IndexOutOfBoundsException();
return new Codepoint(value + 1);
}
/**
* Get the previous codepoint
*/
public Codepoint previous() {
if (value == 0)
throw new IndexOutOfBoundsException();
return new Codepoint(value - 1);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy