All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.ibm.icu.impl.StringSegment Maven / Gradle / Ivy

There is a newer version: 2.12.15
Show newest version
// © 2017 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
package com.ibm.icu.impl;

import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.text.UnicodeSet;

/**
 * A mutable String wrapper with a variable offset and length and support for case folding.
 * 

* The charAt, length, and subSequence methods all operate relative to the fixed offset into the String. *

* CAUTION: Since this class is mutable, it must not be used anywhere that an immutable object is * required, like in a cache or as the key of a hash map. * * @author sffc */ public class StringSegment implements CharSequence { private final String str; private int start; private int end; private boolean foldCase; public StringSegment(String str, boolean foldCase) { this.str = str; this.start = 0; this.end = str.length(); this.foldCase = foldCase; } public int getOffset() { return start; } public void setOffset(int start) { assert start <= end; this.start = start; } /** * Equivalent to setOffset(getOffset()+delta). * *

* Number parsing note: This method is usually called by a Matcher to register that a char was * consumed. If the char is strong (it usually is, except for things like whitespace), follow this * with a call to ParsedNumber#setCharsConsumed(). For more information on strong chars, see that * method. */ public void adjustOffset(int delta) { assert start + delta >= 0; assert start + delta <= end; start += delta; } /** * Adjusts the offset by the width of the current lead code point, either 1 or 2 chars. */ public void adjustOffsetByCodePoint() { start += Character.charCount(getCodePoint()); } public void setLength(int length) { assert length >= 0; assert start + length <= str.length(); end = start + length; } public void resetLength() { end = str.length(); } @Override public int length() { return end - start; } @Override public char charAt(int index) { return str.charAt(index + start); } @Override public CharSequence subSequence(int start, int end) { throw new AssertionError(); // Never used // Possible implementation: // return str.subSequence(start + this.start, end + this.start); } /** * Returns the first code point in the string segment. * *

* Important: Most of the time, you should use {@link #startsWith}, which handles * case folding logic, instead of this method. */ public int getCodePoint() { assert start < end; char lead = str.charAt(start); char trail; if (Character.isHighSurrogate(lead) && start + 1 < end && Character.isLowSurrogate(trail = str.charAt(start + 1))) { return Character.toCodePoint(lead, trail); } return lead; } /** * Returns true if the first code point of this StringSegment equals the given code point. * *

* This method will perform case folding if case folding is enabled for the parser. */ public boolean startsWith(int otherCp) { return codePointsEqual(getCodePoint(), otherCp, foldCase); } /** * Returns true if the first code point of this StringSegment is in the given UnicodeSet. */ public boolean startsWith(UnicodeSet uniset) { // TODO: Move UnicodeSet case-folding logic here. // TODO: Handle string matches here instead of separately. int cp = getCodePoint(); if (cp == -1) { return false; } return uniset.contains(cp); } /** * Returns the length of the prefix shared by this StringSegment and the given CharSequence. For * example, if this string segment is "aab", and the char sequence is "aac", this method returns 2, * since the first 2 characters are the same. * *

* This method only returns offsets along code point boundaries. * *

* This method will perform case folding if case folding was enabled in the constructor. */ public int getCommonPrefixLength(CharSequence other) { return getPrefixLengthInternal(other, foldCase); } /** * Like {@link #getCommonPrefixLength}, but never performs case folding, even if case folding was * enabled in the constructor. */ public int getCaseSensitivePrefixLength(CharSequence other) { return getPrefixLengthInternal(other, false); } private int getPrefixLengthInternal(CharSequence other, boolean foldCase) { int offset = 0; for (; offset < Math.min(length(), other.length());) { int cp1 = Character.codePointAt(this, offset); int cp2 = Character.codePointAt(other, offset); if (!codePointsEqual(cp1, cp2, foldCase)) { break; } offset += Character.charCount(cp1); } return offset; } private static final boolean codePointsEqual(int cp1, int cp2, boolean foldCase) { if (cp1 == cp2) { return true; } if (!foldCase) { return false; } cp1 = UCharacter.foldCase(cp1, true); cp2 = UCharacter.foldCase(cp2, true); return cp1 == cp2; } /** * Equals any CharSequence with the same chars as this segment. * *

* This method does not perform case folding; if you want case-insensitive equality, use * {@link #getCommonPrefixLength}. */ @Override public boolean equals(Object other) { if (!(other instanceof CharSequence)) return false; return Utility.charSequenceEquals(this, (CharSequence) other); } /** Returns a hash code equivalent to calling .toString().hashCode() */ @Override public int hashCode() { return Utility.charSequenceHashCode(this); } @Override public String toString() { return str.substring(0, start) + "[" + str.substring(start, end) + "]" + str.substring(end); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy