net.sf.saxon.tinytree.LargeStringBuffer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of saxon9 Show documentation
Provides a basic XSLT 2.0 and XQuery 1.0 processor (W3C Recommendations, January 2007). Command line interfaces and implementations of several Java APIs (DOM, XPath, s9api) are also included.
The newest version!
package net.sf.saxon.tinytree;

import net.sf.saxon.om.FastStringBuffer;

import java.io.Writer;
import java.io.Serializable;
import java.util.List;
import java.util.ArrayList;
import java.util.Arrays;

/**
 * This is an implementation of the JDK 1.4 CharSequence interface: it implements
 * a CharSequence as a list of arrays of characters (the individual arrays are known
 * as segments). When characters are appended, a new segment is started if the previous
 * array would otherwise overflow a threshold size (the maxAllocation size).
 * 
 * This is more efficient than a buffer backed by a contiguous array of characters
 * in cases where the size is likely to grow very large, and where substring operations
 * are rare. As used within the TinyTree, the value of each text node is contiguous within
 * one segment, so extraction of the value of a text node is efficient.
 */

public final class LargeStringBuffer implements CharSequence, Serializable {

    // TODO:PERF with large documents the Arrays.binarySearch() cost can be noticeable.
    // It would be better if TinyTree addressed into this structure using segment+offset addressing.
    // 16 bits for each would do fine.

    private int minAllocation;
    private int maxAllocation;
    private List segments;      // each segment is a FastStringBuffer
    private int[] startOffsets; // if startOffsets[23] is 123456, then the first
                                // character in segment 23 is the 123456'th character
                                // of the CharSequence value.
    private int length;         // total length of the CharSequence

   /**
    * Create an empty LargeStringBuffer with default space allocation
    */

    public LargeStringBuffer() {
        this(4096, 65536);
    }

   /**
    * Create an empty LargeStringBuffer
    * @param minAllocation initial allocation size for each segment (including the first). If minAllocation
    * exceeds maxAllocation, it is rounded down to the value of maxAllocation
    * @param maxAllocation maximum allocation size for each segment. When a segment reaches this
    * size, a new segment is created rather than appending more characters to the existing segment.
    * However, a segment may have size greater than maxAllocation if the data is appended in a single chunk
    * of size maxAllocation.
    */
    public LargeStringBuffer(int minAllocation, int maxAllocation) {
        this.minAllocation = Math.min(minAllocation, maxAllocation);
        this.maxAllocation = maxAllocation;
        FastStringBuffer initial = new FastStringBuffer(minAllocation);
        segments = new ArrayList(4);
        segments.add(initial);
        startOffsets = new int[1];
        startOffsets[0] = 0;
        length = 0;
    }

    /**
     * Append a CharSequence to this LargeStringBuffer
     * @param data the data to be appended
     */

    public void append(CharSequence data) {
        final int increment = data.length();
        if (increment == 0) {
            return;
        }
        FastStringBuffer last = ((FastStringBuffer)segments.get(segments.size()-1));
        if (last.length() + increment <= maxAllocation) {
            last.append(data);
        } else {
            int[] s2 = new int[startOffsets.length+1];
            System.arraycopy(startOffsets, 0, s2, 0, startOffsets.length);
            s2[startOffsets.length] = length;
            startOffsets = s2;
            last = new FastStringBuffer(Math.max(minAllocation, increment));
            segments.add(last);
            last.append(data);
        }
        length += increment;
    }

    /**
     * Returns the length of this character sequence.  The length is the number
     * of 16-bit UTF-16 characters in the sequence. 
     *
     * @return  the number of characters in this sequence
     */
    public int length() {
        return length;
    }

    /**
     * Returns the character at the specified index.  An index ranges from zero
     * to length() - 1.  The first character of the sequence is at
     * index zero, the next at index one, and so on, as for array
     * indexing. 
     *
     * @param   index   the index of the character to be returned
     *
     * @return  the specified character
     *
     * @throws  IndexOutOfBoundsException
     *          if the index argument is negative or not less than
     *          length()
     */
    public char charAt(int index) {
        if (startOffsets.length == 1) {
            // optimize for small documents
            return ((FastStringBuffer)segments.get(0)).charAt(index);
        }
        if (index < 0 || index >= length) {
            throw new IndexOutOfBoundsException(index+"");
        }
        int seg = Arrays.binarySearch(startOffsets, index);
        if (seg >= 0) {
            return ((FastStringBuffer)segments.get(seg)).charAt(0);
        }
        seg = -seg - 2;
        final int offset = index - startOffsets[seg];
        return ((FastStringBuffer)segments.get(seg)).charAt(offset);
    }

    /**
     * Returns a new character sequence that is a subsequence of this sequence.
     * The subsequence starts with the character at the specified index and
     * ends with the character at index end - 1.  The length of the
     * returned sequence is end - start, so if start == end
     * then an empty sequence is returned. 
     *
     * @param   start   the start index, inclusive
     * @param   end     the end index, exclusive
     *
     * @return  the specified subsequence
     *
     * @throws  IndexOutOfBoundsException
     *          if start or end are negative,
     *          if end is greater than length(),
     *          or if start is greater than end
     */
    public CharSequence subSequence(int start, int end) {
        if (startOffsets.length == 1) {
            // optimize for small documents
            return ((FastStringBuffer)segments.get(0)).subSequence(start, end);
        }
        if (start < 0 || end < 0 || end > length || start > end) {
            throw new IndexOutOfBoundsException("[" + start + ',' + end + ']');
        }
        int seg0 = Arrays.binarySearch(startOffsets, start);
        int offset0;
        if (seg0 >= 0) {
            offset0 = 0;
        } else {
            seg0 = -seg0 - 2;
            offset0 = start - startOffsets[seg0];
        }
        int seg1 = Arrays.binarySearch(startOffsets, end);
        int offset1;
        if (seg1 >= 0) {
            offset1 = 0;
        } else {
            seg1 = -seg1 - 2;
            offset1 = end - startOffsets[seg1];
        }
        FastStringBuffer startSegment = (FastStringBuffer)segments.get(seg0);
        // We've had reports (28 Feb 2007) of an NPE here, which we couldn't reproduce.
        // The following code is designed to produce diagnostics if it ever happens again
        if (startSegment == null) {
            dumpDataStructure();
            throw new NullPointerException("startSegment: subSequence(" + start + ", " + end + ")");
        }
        if (seg0 == seg1) {
            // the required substring is all in one segment
            return startSegment.subSequence(offset0, offset1);
        } else {
            // copy the data into a new FastStringBuffer. This case should be exceptional
            FastStringBuffer sb = new FastStringBuffer(end - start);
            sb.append(startSegment.subSequence(offset0, startSegment.length()));
            for (int i=seg0+1; i 0) {
                sb.append(((FastStringBuffer)segments.get(seg1)).subSequence(0, offset1));
            }
            return sb;
        }
    }

    /**
     * Convert to a string
     */

    public String toString() {
        if (startOffsets.length == 1) {
            // optimize for small documents
            return segments.get(0).toString();
        }
        FastStringBuffer sb = new FastStringBuffer(length);
        for (int i=0; i