All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.AbstractUnicodeFriendlyString Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp;

import com.google.common.base.Optional;
import edu.isi.nlp.strings.offsets.CharOffset;
import edu.isi.nlp.strings.offsets.OffsetRange;

/**
 * Ensures certain common behaviors for {@link UnicodeFriendlyString} implementations, especially
 * that equality and hashcode are done by the UTF-16 code units (that is, the underlying Java {@code
 * String}).
 */
abstract class AbstractUnicodeFriendlyString implements UnicodeFriendlyString {

  @Override
  public final UnicodeFriendlyString substringByCodePoints(OffsetRange codePointRange) {
    return substringByCodePoints(
        codePointRange.startInclusive(),
        // version with separate arguments has exclusive endpoint
        codePointRange.endInclusive().shiftedCopy(1));
  }

  @Override
  public final boolean contains(String otherCodeUnits) {
    return utf16CodeUnits().contains(otherCodeUnits);
  }

  @Override
  public final boolean contains(UnicodeFriendlyString other) {
    return utf16CodeUnits().contains(other.utf16CodeUnits());
  }

  @Override
  public final boolean startsWith(final UnicodeFriendlyString ufs) {
    return startsWith(ufs, CharOffset.asCharOffset(0));
  }

  @Override
  public boolean startsWith(UnicodeFriendlyString ufs, CharOffset offset) {
    // simple but slow implementation
    final Optional ret = codePointIndexOf(ufs, offset);
    return ret.isPresent() && ret.get().asInt() == 0;
  }

  @Override
  public int hashCode() {
    return utf16CodeUnits().hashCode();
  }

  @Override
  public boolean equals(Object other) {
    if (this == other) {
      return true;
    }
    return other instanceof UnicodeFriendlyString
        && utf16CodeUnits().equals(((UnicodeFriendlyString) other).utf16CodeUnits());
  }

  @Override
  public String toString() {
    return utf16CodeUnits();
  }

  @Override
  public final Optional codePointIndexOf(UnicodeFriendlyString other) {
    return codePointIndexOf(other, CharOffset.asCharOffset(0));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy