edu.isi.nlp.StringWithoutNonBmp Maven / Gradle / Ivy
The newest version!
package edu.isi.nlp;
import com.google.common.base.Optional;
import edu.isi.nlp.strings.offsets.CharOffset;
import edu.isi.nlp.strings.offsets.OffsetRange;
import edu.isi.nlp.strings.offsets.UTF16Offset;
import org.immutables.value.Value;
/**
* A {@link UnicodeFriendlyString} which does not contain a non-BMP character. This class should
* never be referenced directly. Always create {@link UnicodeFriendlyString}s via {@link
* StringUtils#unicodeFriendly(String)}. See the interface Javadoc for details.
*/
@IsiNlpImmutable
@Value.Immutable
abstract class StringWithoutNonBmp extends AbstractUnicodeFriendlyString
implements UnicodeFriendlyString {
@Override
public abstract String utf16CodeUnits();
@Override
public final boolean hasNonBmpCharacter() {
return false;
}
@Override
public boolean hasNonBmpCharacter(OffsetRange characterRange) {
return false;
}
public static UnicodeFriendlyString of(String utf16CodeUnits) {
return new StringWithoutNonBmp.Builder().utf16CodeUnits(utf16CodeUnits).build();
}
@Override
public int lengthInUtf16CodeUnits() {
return utf16CodeUnits().length();
}
@Override
public int lengthInCodePoints() {
return lengthInUtf16CodeUnits();
}
@Override
public int codepointAtCodepointIndex(final CharOffset codepointIdx) {
// for this class codepoint indices are guaranteed to equal code unit indices
return utf16CodeUnits().codePointAt(codepointIdx.asInt());
}
@Override
public CharOffset codepointIndex(UTF16Offset offset) {
// if all characters are in the BMP then there is a 1-1 code unit to code point mapping
return CharOffset.asCharOffset(offset.asInt());
}
@Override
public UnicodeFriendlyString substringByCodePoints(final CharOffset startCodepointInclusive) {
return StringWithoutNonBmp.of(utf16CodeUnits().substring(startCodepointInclusive.asInt()));
}
@Override
public UnicodeFriendlyString substringByCodePoints(
final CharOffset startCodepointInclusive, final CharOffset endCodepointExclusive) {
return StringWithoutNonBmp.of(
utf16CodeUnits().substring(startCodepointInclusive.asInt(), endCodepointExclusive.asInt()));
}
@Override
public boolean isEmpty() {
return utf16CodeUnits().isEmpty();
}
@Override
public UnicodeFriendlyString trim() {
return StringWithoutNonBmp.of(utf16CodeUnits().trim());
}
@Override
public final Optional codePointIndexOf(
UnicodeFriendlyString other, CharOffset startIndex) {
if (startIndex.asInt() < 0 || startIndex.asInt() > lengthInCodePoints()) {
throw new IndexOutOfBoundsException("StartIndex was out of bounds: " + startIndex);
}
final int offset = utf16CodeUnits().indexOf(other.utf16CodeUnits(), startIndex.asInt());
if (offset >= 0) {
return Optional.of(CharOffset.asCharOffset(offset));
} else {
return Optional.absent();
}
}
@Override
public final void processCodePoints(CodePointProcessor codePointProcessor) {
for (int i = 0; i < utf16CodeUnits().length(); ++i) {
codePointProcessor.processCodepoint(
this, CharOffset.asCharOffset(i), utf16CodeUnits().codePointAt(i));
}
}
@Override
public final String toString() {
return super.toString();
}
@Override
public final int hashCode() {
return super.hashCode();
}
@Override
public final boolean equals(Object o) {
return super.equals(o);
}
public static class Builder extends ImmutableStringWithoutNonBmp.Builder {}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy