All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.regex.GeneralUnicodeString Maven / Gradle / Ivy

There is a newer version: 12.5
Show newest version
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2015 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

package net.sf.saxon.regex;

import net.sf.saxon.value.*;

/**
 * A Unicode string which, in general, may contain non-BMP characters (that is, codepoints
 * outside the range 0-65535)
 */

public final class GeneralUnicodeString extends UnicodeString {

    private int[] chars;
    private int start;
    private int end;
    private CharSequence charSequence;

    public GeneralUnicodeString(CharSequence in) {
        chars = net.sf.saxon.value.StringValue.expand(in);
        start = 0;
        end = chars.length;
        charSequence = in;
    }

    GeneralUnicodeString(int[] chars, int start, int end) {
        this.chars = chars;
        this.start = start;
        this.end = end;
    }

    public UnicodeString uSubstring(int beginIndex, int endIndex) {
        if (endIndex > chars.length) {
            throw new IndexOutOfBoundsException("endIndex=" + endIndex
                    + "; sequence size=" + chars.length);
        }
        if (beginIndex < 0 || beginIndex > endIndex) {
            throw new IndexOutOfBoundsException("beginIndex=" + beginIndex
                    + "; endIndex=" + endIndex);
        }
        return new GeneralUnicodeString(chars, start + beginIndex, start + endIndex);
    }

    public int uCharAt(int pos) {
        return chars[start + pos];
    }

    public int uIndexOf(int search, int pos) {
        for (int i = pos; i < uLength(); i++) {
            if (chars[start + i] == search) {
                return i;
            }
        }
        return -1;
    }

    public int uLength() {
        return end - start;
    }

    public boolean isEnd(int pos) {
        return pos >= (end - start);
    }

    public String toString() {
        int[] c = chars;
        if (start != 0) {
            c = new int[end - start];
            System.arraycopy(chars, start, c, 0, end - start);
        }
        return StringValue.contract(c, end - start).toString();
    }

    /**
     * Get a CharSequence representing this string. This is a memo function; the result is saved for
     * use if needed again. The CharSequence returned is one that has efficient support for operations
     * such as charAt(p) where non-BMP characters are represented as surrogate pairs.
     * @return a CharSequence representing the same string, with efficient positional access to
     * UTF16 codepoints.
     */

    private CharSequence obtainCharSequence() {
        if (charSequence == null) {
            int[] c = chars;
            if (start != 0) {
                c = new int[end - start];
                System.arraycopy(chars, start, c, 0, end - start);
            }
            charSequence = StringValue.contract(c, end - start);
        }
        return charSequence;
    }

    /**
     * Returns the length of this character sequence.  The length is the number
     * of 16-bit chars in the sequence.

* * @return the number of chars in this sequence */ public int length() { return obtainCharSequence().length(); } /** * Returns the char value at the specified index. An index ranges from zero * to length() - 1. The first char value of the sequence is at * index zero, the next at index one, and so on, as for array * indexing.

*

*

If the char value specified by the index is a * surrogate, the surrogate * value is returned. * * @param index the index of the char value to be returned * @return the specified char value * @throws IndexOutOfBoundsException if the index argument is negative or not less than * length() */ public char charAt(int index) { return obtainCharSequence().charAt(index); } /** * Returns a new CharSequence that is a subsequence of this sequence. * The subsequence starts with the char value at the specified index and * ends with the char value at index end - 1. The length * (in chars) of the * returned sequence is end - start, so if start == end * then an empty sequence is returned.

* * @param start the start index, inclusive * @param end the end index, exclusive * @return the specified subsequence * @throws IndexOutOfBoundsException if start or end are negative, * if end is greater than length(), * or if start is greater than end */ public CharSequence subSequence(int start, int end) { return obtainCharSequence().subSequence(start, end); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy