net.sf.saxon.regex.GeneralUnicodeString Maven / Gradle / Ivy
Show all versions of Saxon-HE Show documentation
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.regex;
import net.sf.saxon.value.*;
/**
* A Unicode string which, in general, may contain non-BMP characters (that is, codepoints
* outside the range 0-65535)
*/
public final class GeneralUnicodeString extends UnicodeString {
private int[] chars;
private int start;
private int end;
private CharSequence charSequence;
public GeneralUnicodeString(CharSequence in) {
chars = net.sf.saxon.value.StringValue.expand(in);
start = 0;
end = chars.length;
charSequence = in;
}
GeneralUnicodeString(int[] chars, int start, int end) {
this.chars = chars;
this.start = start;
this.end = end;
}
public UnicodeString uSubstring(int beginIndex, int endIndex) {
if (endIndex > chars.length) {
throw new IndexOutOfBoundsException("endIndex=" + endIndex
+ "; sequence size=" + chars.length);
}
if (beginIndex < 0 || beginIndex > endIndex) {
throw new IndexOutOfBoundsException("beginIndex=" + beginIndex
+ "; endIndex=" + endIndex);
}
return new GeneralUnicodeString(chars, start + beginIndex, start + endIndex);
}
public int uCharAt(int pos) {
return chars[start + pos];
}
public int uIndexOf(int search, int pos) {
for (int i = pos; i < uLength(); i++) {
if (chars[start + i] == search) {
return i;
}
}
return -1;
}
public int uLength() {
return end - start;
}
public boolean isEnd(int pos) {
return pos >= (end - start);
}
public String toString() {
int[] c = chars;
if (start != 0) {
c = new int[end - start];
System.arraycopy(chars, start, c, 0, end - start);
}
return StringValue.contract(c, end - start).toString();
}
/**
* Get a CharSequence representing this string. This is a memo function; the result is saved for
* use if needed again. The CharSequence returned is one that has efficient support for operations
* such as charAt(p) where non-BMP characters are represented as surrogate pairs.
* @return a CharSequence representing the same string, with efficient positional access to
* UTF16 codepoints.
*/
private CharSequence obtainCharSequence() {
if (charSequence == null) {
int[] c = chars;
if (start != 0) {
c = new int[end - start];
System.arraycopy(chars, start, c, 0, end - start);
}
charSequence = StringValue.contract(c, end - start);
}
return charSequence;
}
/**
* Returns the length of this character sequence. The length is the number
* of 16-bit char
s in the sequence.
*
* @return the number of char
s in this sequence
*/
public int length() {
return obtainCharSequence().length();
}
/**
* Returns the char
value at the specified index. An index ranges from zero
* to length() - 1. The first char
value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* If the char
value specified by the index is a
* surrogate, the surrogate
* value is returned.
*
* @param index the index of the char
value to be returned
* @return the specified char
value
* @throws IndexOutOfBoundsException if the index argument is negative or not less than
* length()
*/
public char charAt(int index) {
return obtainCharSequence().charAt(index);
}
/**
* Returns a new CharSequence
that is a subsequence of this sequence.
* The subsequence starts with the char
value at the specified index and
* ends with the char
value at index end - 1. The length
* (in char
s) of the
* returned sequence is end - start, so if start == end
* then an empty sequence is returned.
*
* @param start the start index, inclusive
* @param end the end index, exclusive
* @return the specified subsequence
* @throws IndexOutOfBoundsException if start or end are negative,
* if end is greater than length(),
* or if start is greater than end
*/
public CharSequence subSequence(int start, int end) {
return obtainCharSequence().subSequence(start, end);
}
}