All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.abdera.i18n.text.Codepoint Maven / Gradle / Ivy

/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements.  The ASF licenses this file to You
* under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License.
* You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.  For additional information regarding
* copyright in this work, please see the NOTICE file in the top level
* directory of this distribution.
*/
package org.apache.abdera.i18n.text;

import java.io.Serializable;
import java.io.UnsupportedEncodingException;

/**
 * Represents a single Unicode Codepoint
 */
public class Codepoint
  implements Serializable, 
             Cloneable,
             Comparable{

  private static final long serialVersionUID = 140337939131905483L;
  
  private static final String DEFAULT_ENCODING = "UTF-8";
  private final int value;

  /**
   * Create a Codepoint from a byte array using the default encoding (UTF-8)
   */
  public Codepoint(byte[] bytes) {
    try {
      this.value = valueFromCharSequence(new String(bytes,DEFAULT_ENCODING));
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
  }
  
  /**
   * Create a Codepoint from a byte array with the specified charset encoding. Length must equal 1
   */
  public Codepoint(
    byte[] bytes, 
    String encoding) 
      throws UnsupportedEncodingException {
    this.value = valueFromCharSequence(new String(bytes,encoding));
  }
  
  /**
   * Create a Codepoint from a CharSequence. Length must equal 1
   */
  public Codepoint(CharSequence value) {
    this(valueFromCharSequence(value));
  }
  
  private static int valueFromCharSequence(CharSequence s) {
    if (s.length() == 1) {
      return (int)s.charAt(0);
    } else if (s.length() > 2) {
      throw new IllegalArgumentException("Too many chars");
    } else {
      char high = s.charAt(0);
      char low = s.charAt(1);
      return CharUtils.toSupplementary(high, low).getValue();
    }
  }
  
  /**
   * Create a codepoint from a single char
   */
  public Codepoint(char value) {
    this((int)value);
  }

  /**
   * Create a codepoint from a surrogate pair
   */
  public Codepoint(char high, char low) {
    this(CharUtils.toSupplementary(high, low).getValue());
  }
  
  /**
   * Create a codepoint as a copy of another codepoint
   */
  public Codepoint(Codepoint codepoint) {
    this(codepoint.value);
  }
  
  /**
   * Create a codepoint from a specific integer value
   */
  public Codepoint(int value) {
    if (value < 0) 
      throw new IllegalArgumentException(
        "Invalid Codepoint");
    this.value = value;
  }
  
  /**
   * The codepoint value
   */
  public int getValue() {
    return value;
  }
  
  /**
   * True if this codepoint is supplementary
   */
  public boolean isSupplementary() {
    return CharUtils.isSupplementary(value);
  }

  /**
   * True if this codepoint is a low surrogate
   */
  public boolean isLowSurrogate() {
    return CharUtils.isLowSurrogate((char)value);
  }
  
  /**
   * True if this codepoint is a high surrogate
   */
  public boolean isHighSurrogate() {
    return CharUtils.isHighSurrogate((char)value);
  }
  
  /**
   * Get the high surrogate of this Codepoint
   */
  public char getHighSurrogate() {
    return CharUtils.getHighSurrogate(value);
  }
  
  /**
   * Get the low surrogate of this Codepoint
   */
  public char getLowSurrogate() {
    return CharUtils.getLowSurrogate(value);
  }
  
  /**
   * True if this Codepoint is a bidi control char
   */
  public boolean isBidi() {
    return CharUtils.isBidi(value);
  }
  
  public boolean isDigit() {
    return CharUtils.isDigit(value);
  }

  public boolean isAlpha() {
    return CharUtils.isAlpha(value);
  }
  
  public boolean isAlphaDigit() {
    return CharUtils.isAlpha(value);
  }
  
  public int compareTo(Codepoint o) {
    return value < o.value ? -1 :
           value == o.value ? 0 : 1;
  }
  
  public String toString() {
    return CharUtils.toString(value);
  }
  
  public char[] toChars() {
    return toString().toCharArray();
  }
  
  /**
   * Get the number of chars necessary to represent this codepoint.
   * Returns 2 if this is a supplementary codepoint
   */
  public int getCharCount() {
    return toChars().length;
  }
  
  public byte[] toBytes() {
    try {
      return toBytes(DEFAULT_ENCODING);
    } catch (UnsupportedEncodingException e) {
      throw new RuntimeException(e);
    }
  }
  
  public byte[] toBytes(
    String encoding) 
      throws UnsupportedEncodingException {
    return toString().getBytes(encoding);
  }

  @Override 
  public int hashCode() {
    final int prime = 31;
    int result = 1;
    result = prime * result + value;
    return result;
  }

  @Override 
  public boolean equals(Object obj) {
    if (this == obj) return true;
    if (obj == null) return false;
    if (getClass() != obj.getClass()) return false;
    final Codepoint other = (Codepoint) obj;
    if (value != other.value) return false;
    return true;
  }
   
  /**
   * Plane 0 (0000 FFFF): Basic Multilingual Plane (BMP). This is the plane containing most of the character assignments so far. A primary objective for the BMP is to support the unification of prior character sets as well as characters for writing systems in current use.
   * Plane 1 (10000 1FFFF): Supplementary Multilingual Plane (SMP).
   * Plane 2 (20000 2FFFF): Supplementary Ideographic Plane (SIP)
   * Planes 3 to 13 (30000 DFFFF) are unassigned
   * Plane 14 (E0000 EFFFF): Supplementary Special-purpose Plane (SSP)
   * Plane 15 (F0000 FFFFF) reserved for the Private Use Area (PUA)
   * Plane 16 (100000 10FFFF), reserved for the Private Use Area (PUA)
   **/
  public int getPlane() {
    return value / (0xFFFF + 1);
  }
  
  public Codepoint clone() {
    try {
      return (Codepoint) super.clone();
    } catch (CloneNotSupportedException e) {
      return new Codepoint(value);
    }
  }
  
  /**
   * Get the next codepoint
   */
  public Codepoint next() {
    if (value == 0x10ffff) throw new IndexOutOfBoundsException();
    return new Codepoint(value + 1);
  }
  
  /**
   * Get the previous codepoint
   */
  public Codepoint previous() {
    if (value == 0) throw new IndexOutOfBoundsException();
    return new Codepoint(value - 1);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy