
net.sf.saxon.regex.BMPString Maven / Gradle / Ivy
Show all versions of Saxon-HE Show documentation
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.regex;
/**
* An implementation of UnicodeString optimized for strings that contain
* no characters outside the BMP (i.e. no characters whose codepoints exceed 65535)
*/
public final class BMPString extends UnicodeString {
private final CharSequence src;
/**
* Create a BMPString
* @param src - encapsulated CharSequence.
* The client must ensure that this contains no surrogate pairs, and that
* it is immutable
*/
public BMPString(CharSequence src) {
this.src = src;
}
public UnicodeString uSubstring(int beginIndex, int endIndex) {
return new BMPString(src.subSequence(beginIndex, endIndex));
}
public int uCharAt(int pos) {
return src.charAt(pos);
}
public int uIndexOf(int search, int pos) {
if (search > 65535) {
return -1;
} else {
for (int i = pos; i < src.length(); i++) {
if (src.charAt(i) == (char) search) {
return i;
}
}
return -1;
}
}
public int uLength() {
return src.length();
}
public boolean isEnd(int pos) {
return pos >= src.length();
}
public String toString() {
return src.toString();
}
/**
* Get the underlying CharSequence
* @return the underlying CharSequence
*/
public CharSequence getCharSequence() {
return src;
}
/**
* Returns the length of this character sequence. The length is the number
* of 16-bit char
s in the sequence.
*
* @return the number of char
s in this sequence
*/
public int length() {
return src.length();
}
/**
* Returns the char
value at the specified index. An index ranges from zero
* to length() - 1. The first char
value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* If the char
value specified by the index is a
* surrogate, the surrogate
* value is returned.
*
* @param index the index of the char
value to be returned
* @return the specified char
value
* @throws IndexOutOfBoundsException if the index argument is negative or not less than
* length()
*/
public char charAt(int index) {
return src.charAt(index);
}
/**
* Returns a new CharSequence
that is a subsequence of this sequence.
* The subsequence starts with the char
value at the specified index and
* ends with the char
value at index end - 1. The length
* (in char
s) of the
* returned sequence is end - start, so if start == end
* then an empty sequence is returned.
*
* @param start the start index, inclusive
* @param end the end index, exclusive
* @return the specified subsequence
* @throws IndexOutOfBoundsException if start or end are negative,
* if end is greater than length(),
* or if start is greater than end
*/
public CharSequence subSequence(int start, int end) {
return src.subSequence(start, end);
}
}