org.unlaxer.StringIndexAccessor Maven / Gradle / Ivy
package org.unlaxer;
public interface StringIndexAccessor {
/**
* Returns the {@code char} value at the
* specified index. An index ranges from {@code 0} to
* {@code length() - 1}. The first {@code char} value of the sequence
* is at index {@code 0}, the next at index {@code 1},
* and so on, as for array indexing.
*
* If the {@code char} value specified by the index is a
* surrogate, the surrogate
* value is returned.
*
* @param index the index of the {@code char} value.
* @return the {@code char} value at the specified index of this string.
* The first {@code char} value is at index {@code 0}.
* @exception IndexOutOfBoundsException if the {@code index}
* argument is negative or not less than the length of this
* string.
*/
char charAt(int index);
default char charAt(StringIndex index) {
return charAt(index.value());
}
/**
* Returns the character (Unicode code point) at the specified
* index. The index refers to {@code char} values
* (Unicode code units) and ranges from {@code 0} to
* {@link #length()}{@code - 1}.
*
*
If the {@code char} value specified at the given index
* is in the high-surrogate range, the following index is less
* than the length of this {@code String}, and the
* {@code char} value at the following index is in the
* low-surrogate range, then the supplementary code point
* corresponding to this surrogate pair is returned. Otherwise,
* the {@code char} value at the given index is returned.
*
* @param index the index to the {@code char} values
* @return the code point value of the character at the
* {@code index}
* @exception IndexOutOfBoundsException if the {@code index}
* argument is negative or not less than the length of this
* string.
* @since 1.5
*/
int codePointAt(int index);
/**
* Returns the character (Unicode code point) before the specified
* index. The index refers to {@code char} values
* (Unicode code units) and ranges from {@code 1} to {@link
* CharSequence#length() length}.
*
*
If the {@code char} value at {@code (index - 1)}
* is in the low-surrogate range, {@code (index - 2)} is not
* negative, and the {@code char} value at {@code (index -
* 2)} is in the high-surrogate range, then the
* supplementary code point value of the surrogate pair is
* returned. If the {@code char} value at {@code index -
* 1} is an unpaired low-surrogate or a high-surrogate, the
* surrogate value is returned.
*
* @param index the index following the code point that should be returned
* @return the Unicode code point value before the given index.
* @exception IndexOutOfBoundsException if the {@code index}
* argument is less than 1 or greater than the length
* of this string.
* @since 1.5
*/
int codePointBefore(int index);
/**
* Returns the number of Unicode code points in the specified text
* range of this {@code String}. The text range begins at the
* specified {@code beginIndex} and extends to the
* {@code char} at index {@code endIndex - 1}. Thus the
* length (in {@code char}s) of the text range is
* {@code endIndex-beginIndex}. Unpaired surrogates within
* the text range count as one code point each.
*
* @param beginIndex the index to the first {@code char} of
* the text range.
* @param endIndex the index after the last {@code char} of
* the text range.
* @return the number of Unicode code points in the specified text
* range
* @exception IndexOutOfBoundsException if the
* {@code beginIndex} is negative, or {@code endIndex}
* is larger than the length of this {@code String}, or
* {@code beginIndex} is larger than {@code endIndex}.
* @since 1.5
*/
int codePointCount(int beginIndex, int endIndex);
default Count codePointCount(StringIndex beginIndex, StringIndex endIndex) {
return new Count(codePointCount(beginIndex.value(), endIndex.value()));
}
/**
* Returns the index within this {@code String} that is
* offset from the given {@code index} by
* {@code codePointOffset} code points. Unpaired surrogates
* within the text range given by {@code index} and
* {@code codePointOffset} count as one code point each.
*
* @param index the index to be offset
* @param codePointOffset the offset in code points
* @return the index within this {@code String}
* @exception IndexOutOfBoundsException if {@code index}
* is negative or larger then the length of this
* {@code String}, or if {@code codePointOffset} is positive
* and the substring starting with {@code index} has fewer
* than {@code codePointOffset} code points,
* or if {@code codePointOffset} is negative and the substring
* before {@code index} has fewer than the absolute value
* of {@code codePointOffset} code points.
* @since 1.5
*/
int offsetByCodePoints(int index, int codePointOffset);
default StringIndex offsetByCodePoints(StringIndex index, CodePointOffset codePointOffset) {
return new StringIndex(offsetByCodePoints(index.value(), codePointOffset.value()));
}
/**
* Copies characters from this string into the destination character
* array.
*
* The first character to be copied is at index {@code srcBegin};
* the last character to be copied is at index {@code srcEnd-1}
* (thus the total number of characters to be copied is
* {@code srcEnd-srcBegin}). The characters are copied into the
* subarray of {@code dst} starting at index {@code dstBegin}
* and ending at index:
*
* dstBegin + (srcEnd-srcBegin) - 1
*
*
* @param srcBegin index of the first character in the string
* to copy.
* @param srcEnd index after the last character in the string
* to copy.
* @param dst the destination array.
* @param dstBegin the start offset in the destination array.
* @exception IndexOutOfBoundsException If any of the following
* is true:
* - {@code srcBegin} is negative.
*
- {@code srcBegin} is greater than {@code srcEnd}
*
- {@code srcEnd} is greater than the length of this
* string
*
- {@code dstBegin} is negative
*
- {@code dstBegin+(srcEnd-srcBegin)} is larger than
* {@code dst.length}
*/
void getChars(int srcBegin, int srcEnd, char dst[], int dstBegin);
default void getChars(StringIndex srcBegin, StringIndex srcEnd, char dst[], StringIndex dstBegin) {
getChars(srcBegin.value(), srcEnd.value(), dst, dstBegin.value());
}
/**
* Copies characters from this string into the destination byte array. Each
* byte receives the 8 low-order bits of the corresponding character. The
* eight high-order bits of each character are not copied and do not
* participate in the transfer in any way.
*
* The first character to be copied is at index {@code srcBegin}; the
* last character to be copied is at index {@code srcEnd-1}. The total
* number of characters to be copied is {@code srcEnd-srcBegin}. The
* characters, converted to bytes, are copied into the subarray of {@code
* dst} starting at index {@code dstBegin} and ending at index:
*
*
* dstBegin + (srcEnd-srcBegin) - 1
*
*
* @deprecated This method does not properly convert characters into
* bytes. As of JDK 1.1, the preferred way to do this is via the
* {@link #getBytes()} method, which uses the platform's default charset.
*
* @param srcBegin
* Index of the first character in the string to copy
*
* @param srcEnd
* Index after the last character in the string to copy
*
* @param dst
* The destination array
*
* @param dstBegin
* The start offset in the destination array
*
* @throws IndexOutOfBoundsException
* If any of the following is true:
*
* - {@code srcBegin} is negative
*
- {@code srcBegin} is greater than {@code srcEnd}
*
- {@code srcEnd} is greater than the length of this String
*
- {@code dstBegin} is negative
*
- {@code dstBegin+(srcEnd-srcBegin)} is larger than {@code
* dst.length}
*
*/
void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin);
default void getBytes(StringIndex srcBegin, StringIndex srcEnd, byte dst[], StringIndex dstBegin) {
getBytes(srcBegin.value(), srcEnd.value(), dst, dstBegin.value());
}
/**
* Tests if two string regions are equal.
*
* A substring of this {@code String} object is compared to a substring
* of the argument other. The result is true if these substrings
* represent identical character sequences. The substring of this
* {@code String} object to be compared begins at index {@code toffset}
* and has length {@code len}. The substring of other to be compared
* begins at index {@code ooffset} and has length {@code len}. The
* result is {@code false} if and only if at least one of the following
* is true:
*
- {@code toffset} is negative.
*
- {@code ooffset} is negative.
*
- {@code toffset+len} is greater than the length of this
* {@code String} object.
*
- {@code ooffset+len} is greater than the length of the other
* argument.
*
- There is some nonnegative integer k less than {@code len}
* such that:
* {@code this.charAt(toffset + }k{@code ) != other.charAt(ooffset + }
* k{@code )}
*
*
* Note that this method does not take locale into account. The
* {@link java.text.Collator} class provides locale-sensitive comparison.
*
* @param toffset the starting offset of the subregion in this string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return {@code true} if the specified subregion of this string
* exactly matches the specified subregion of the string argument;
* {@code false} otherwise.
*/
boolean regionMatches(int toffset, String other, int ooffset, int len);
default boolean regionMatches(StringIndex toffset, String other, StringIndex ooffset, Length len) {
return regionMatches(toffset.value(), other , ooffset.value(), len.value());
}
default boolean regionMatches(StringIndex toffset, CodePointAccessor other, StringIndex ooffset, Length len) {
return regionMatches(toffset.value(), other.sourceAsString(), ooffset.value(), len.value());
}
/**
* Tests if two string regions are equal.
*
* A substring of this {@code String} object is compared to a substring
* of the argument {@code other}. The result is {@code true} if these
* substrings represent character sequences that are the same, ignoring
* case if and only if {@code ignoreCase} is true. The substring of
* this {@code String} object to be compared begins at index
* {@code toffset} and has length {@code len}. The substring of
* {@code other} to be compared begins at index {@code ooffset} and
* has length {@code len}. The result is {@code false} if and only if
* at least one of the following is true:
*
- {@code toffset} is negative.
*
- {@code ooffset} is negative.
*
- {@code toffset+len} is greater than the length of this
* {@code String} object.
*
- {@code ooffset+len} is greater than the length of the other
* argument.
*
- {@code ignoreCase} is {@code false} and there is some nonnegative
* integer k less than {@code len} such that:
*
* this.charAt(toffset+k) != other.charAt(ooffset+k)
*
* - {@code ignoreCase} is {@code true} and there is some nonnegative
* integer k less than {@code len} such that:
*
* Character.toLowerCase(Character.toUpperCase(this.charAt(toffset+k))) !=
Character.toLowerCase(Character.toUpperCase(other.charAt(ooffset+k)))
*
*
*
* Note that this method does not take locale into account,
* and will result in unsatisfactory results for certain locales when
* {@code ignoreCase} is {@code true}. The {@link java.text.Collator} class
* provides locale-sensitive comparison.
*
* @param ignoreCase if {@code true}, ignore case when comparing
* characters.
* @param toffset the starting offset of the subregion in this
* string.
* @param other the string argument.
* @param ooffset the starting offset of the subregion in the string
* argument.
* @param len the number of characters to compare.
* @return {@code true} if the specified subregion of this string
* matches the specified subregion of the string argument;
* {@code false} otherwise. Whether the matching is exact
* or case insensitive depends on the {@code ignoreCase}
* argument.
*/
boolean regionMatches(boolean ignoreCase, int toffset, String other, int ooffset, int len);
default boolean regionMatches(boolean ignoreCase, StringIndex toffset, String other, StringIndex ooffset, Length len) {
return regionMatches(ignoreCase, toffset.value(), other, ooffset.value(), len.value());
}
default boolean regionMatches(boolean ignoreCase, StringIndex toffset, CodePointAccessor other, StringIndex ooffset, Length len) {
return regionMatches(ignoreCase, toffset.value(), other.sourceAsString(), ooffset.value(), len.value());
}
/**
* Tests if the substring of this string beginning at the
* specified index starts with the specified prefix.
*
* @param prefix the prefix.
* @param toffset where to begin looking in this string.
* @return {@code true} if the character sequence represented by the
* argument is a prefix of the substring of this object starting
* at index {@code toffset}; {@code false} otherwise.
* The result is {@code false} if {@code toffset} is
* negative or greater than the length of this
* {@code String} object; otherwise the result is the same
* as the result of the expression
*
* this.substring(toffset).startsWith(prefix)
*
*/
boolean startsWith(String prefix, int toffset);
default boolean startsWith(String prefix, StringIndex toffset) {
return startsWith(prefix, toffset.value());
}
default boolean startsWith(CodePointAccessor prefix, StringIndex toffset) {
return startsWith(prefix.sourceAsString(), toffset.value());
}
/**
* Returns the index within this string of the first occurrence of the
* specified character, starting the search at the specified index.
*
* If a character with value {@code ch} occurs in the
* character sequence represented by this {@code String}
* object at an index no smaller than {@code fromIndex}, then
* the index of the first such occurrence is returned. For values
* of {@code ch} in the range from 0 to 0xFFFF (inclusive),
* this is the smallest value k such that:
*
* (this.charAt(k) == ch) {@code &&} (k >= fromIndex)
*
* is true. For other values of {@code ch}, it is the
* smallest value k such that:
*
* (this.codePointAt(k) == ch) {@code &&} (k >= fromIndex)
*
* is true. In either case, if no such character occurs in this
* string at or after position {@code fromIndex}, then
* {@code -1} is returned.
*
*
* There is no restriction on the value of {@code fromIndex}. If it
* is negative, it has the same effect as if it were zero: this entire
* string may be searched. If it is greater than the length of this
* string, it has the same effect as if it were equal to the length of
* this string: {@code -1} is returned.
*
*
All indices are specified in {@code char} values
* (Unicode code units).
*
* @param ch a character (Unicode code point).
* @param fromIndex the index to start the search from.
* @return the index of the first occurrence of the character in the
* character sequence represented by this object that is greater
* than or equal to {@code fromIndex}, or {@code -1}
* if the character does not occur.
*/
int indexOf(int ch, int fromIndex);
default StringIndexWithNegativeValue indexOf(CodePoint codePoint, StringIndex fromIndex) {
return new StringIndexWithNegativeValue(indexOf(codePoint.value(),fromIndex.value()));
}
/**
* Returns the index within this string of the last occurrence of
* the specified character, searching backward starting at the
* specified index. For values of {@code ch} in the range
* from 0 to 0xFFFF (inclusive), the index returned is the largest
* value k such that:
*
* (this.charAt(k) == ch) {@code &&} (k <= fromIndex)
*
* is true. For other values of {@code ch}, it is the
* largest value k such that:
*
* (this.codePointAt(k) == ch) {@code &&} (k <= fromIndex)
*
* is true. In either case, if no such character occurs in this
* string at or before position {@code fromIndex}, then
* {@code -1} is returned.
*
* All indices are specified in {@code char} values
* (Unicode code units).
*
* @param ch a character (Unicode code point).
* @param fromIndex the index to start the search from. There is no
* restriction on the value of {@code fromIndex}. If it is
* greater than or equal to the length of this string, it has
* the same effect as if it were equal to one less than the
* length of this string: this entire string may be searched.
* If it is negative, it has the same effect as if it were -1:
* -1 is returned.
* @return the index of the last occurrence of the character in the
* character sequence represented by this object that is less
* than or equal to {@code fromIndex}, or {@code -1}
* if the character does not occur before that point.
*/
int lastIndexOf(int ch, int fromIndex);
default StringIndexWithNegativeValue lastIndexOf(CodePoint codePoint, StringIndex fromIndex) {
return new StringIndexWithNegativeValue(lastIndexOf(codePoint.value(), fromIndex.value()));
}
/**
* Returns the index within this string of the first occurrence of the
* specified substring, starting at the specified index.
*
*
The returned index is the smallest value {@code k} for which:
*
{@code
* k >= Math.min(fromIndex, this.length()) &&
* this.startsWith(str, k)
* }
* If no such value of {@code k} exists, then {@code -1} is returned.
*
* @param str the substring to search for.
* @param fromIndex the index from which to start the search.
* @return the index of the first occurrence of the specified substring,
* starting at the specified index,
* or {@code -1} if there is no such occurrence.
*/
int indexOf(String str, int fromIndex);
default StringIndexWithNegativeValue indexOf(CodePointAccessor str, StringIndex fromIndex) {
return new StringIndexWithNegativeValue(indexOf(str.sourceAsString(),fromIndex.value()));
}
/**
* Returns the index within this string of the last occurrence of the
* specified substring, searching backward starting at the specified index.
*
* The returned index is the largest value {@code k} for which:
*
{@code
* k <= Math.min(fromIndex, this.length()) &&
* this.startsWith(str, k)
* }
* If no such value of {@code k} exists, then {@code -1} is returned.
*
* @param str the substring to search for.
* @param fromIndex the index to start the search from.
* @return the index of the last occurrence of the specified substring,
* searching backward from the specified index,
* or {@code -1} if there is no such occurrence.
*/
int lastIndexOf(String str, int fromIndex);
/**
* Returns a string that is a substring of this string. The
* substring begins with the character at the specified index and
* extends to the end of this string.
* Examples:
*
* "unhappy".substring(2) returns "happy"
* "Harbison".substring(3) returns "bison"
* "emptiness".substring(9) returns "" (an empty string)
*
*
* @param beginIndex the beginning index, inclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException if
* {@code beginIndex} is negative or larger than the
* length of this {@code String} object.
*/
String substring(int beginIndex);
/**
* Returns a string that is a substring of this string. The
* substring begins at the specified {@code beginIndex} and
* extends to the character at index {@code endIndex - 1}.
* Thus the length of the substring is {@code endIndex-beginIndex}.
*
* Examples:
*
* "hamburger".substring(4, 8) returns "urge"
* "smiles".substring(1, 5) returns "mile"
*
*
* @param beginIndex the beginning index, inclusive.
* @param endIndex the ending index, exclusive.
* @return the specified substring.
* @exception IndexOutOfBoundsException if the
* {@code beginIndex} is negative, or
* {@code endIndex} is larger than the length of
* this {@code String} object, or
* {@code beginIndex} is larger than
* {@code endIndex}.
*/
String substring(int beginIndex, int endIndex);
/**
* Returns a character sequence that is a subsequence of this sequence.
*
* An invocation of this method of the form
*
*
* str.subSequence(begin, end)
*
* behaves in exactly the same way as the invocation
*
*
* str.substring(begin, end)
*
* @apiNote
* This method is defined so that the {@code String} class can implement
* the {@link CharSequence} interface.
*
* @param beginIndex the begin index, inclusive.
* @param endIndex the end index, exclusive.
* @return the specified subsequence.
*
* @throws IndexOutOfBoundsException
* if {@code beginIndex} or {@code endIndex} is negative,
* if {@code endIndex} is greater than {@code length()},
* or if {@code beginIndex} is greater than {@code endIndex}
*
* @since 1.4
* @spec JSR-51
*/
CharSequence subSequence(int beginIndex, int endIndex);
}