All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.saxon.tinytree.CompressedWhitespace Maven / Gradle / Ivy

package net.sf.saxon.tinytree;

import net.sf.saxon.om.FastStringBuffer;

import java.io.Writer;

/**
 * This class provides a compressed representation of a sequence of whitespace characters. The representation
 * is a sequence of bytes: in each byte the top two bits indicate which whitespace character is used
 * (x9, xA, xD, or x20) and the bottom six bits indicate the number of such characters. A zero byte is a filler.
 * We don't compress the sequence if it would occupy more than 8 bytes, because that's the space we've got available
 * in the TinyTree arrays.
 */

public class CompressedWhitespace implements CharSequence {

    private static char[] WHITE_CHARS = {0x09, 0x0A, 0x0D, 0x20};

    private long value;

    public CompressedWhitespace(long compressedValue) {
        value = compressedValue;
    }

    /**
     * Attempt to compress a CharSequence
     * @param in the CharSequence to be compressed
     * @return the compressed sequence if it can be compressed; or the original CharSequence otherwise
     */

    public static CharSequence compress(CharSequence in) {
        final int inlen = in.length();
        if (inlen == 0) {
            return in;
        }
        int runlength = 1;
        int outlength = 0;
        for (int i=0; i= 0) {
                if (i == inlen-1 || c != in.charAt(i+1) || runlength == 63) {
                    runlength = 1;
                    outlength++;
                    if (outlength > 8) {
                        return in;
                    }
                } else {
                    runlength++;
                }
            } else {
                return in;
            }
        }
        int ix = 0;
        runlength = 1;
        int[] out = new int[outlength];
        for (int i=0; i=0; s-=8) {
            byte b = (byte)((value >>>s) & 0xff);
            if (b == 0) {
                break;
            }
            char c = WHITE_CHARS[b>>>6 & 0x3];
            int len = (b & 0x3f);
            buffer.ensureCapacity(len);
            for (int j=0; j=0; s-=8) {
            int c = (int)((val>>>s) & 0x3f);
            if (c == 0) {
                break;
            }
            count += c;
        }
        return count;
    }

    /**
     * Returns the char value at the specified index.  An index ranges from zero
     * to length() - 1.  The first char value of the sequence is at
     * index zero, the next at index one, and so on, as for array
     * indexing. 

*

*

If the char value specified by the index is a * surrogate, the surrogate * value is returned. * * @param index the index of the char value to be returned * @return the specified char value * @throws IndexOutOfBoundsException if the index argument is negative or not less than * length() */ public char charAt(int index) { int count = 0; final long val = value; for (int s=56; s>=0; s-=8) { byte b = (byte)((val>>>s) & 0xff); if (b == 0) { break; } count += (b & 0x3f); if (count > index) { return WHITE_CHARS[b>>>6 & 0x3]; } } throw new IndexOutOfBoundsException(index+""); } /** * Returns a new CharSequence that is a subsequence of this sequence. * The subsequence starts with the char value at the specified index and * ends with the char value at index end - 1. The length * (in chars) of the * returned sequence is end - start, so if start == end * then an empty sequence is returned.

* * @param start the start index, inclusive * @param end the end index, exclusive * @return the specified subsequence * @throws IndexOutOfBoundsException if start or end are negative, * if end is greater than length(), * or if start is greater than end */ public CharSequence subSequence(int start, int end) { return uncompress(null).subSequence(start, end); } /** * Indicates whether some other object is "equal to" this one. */ public boolean equals(Object obj) { if (obj instanceof CompressedWhitespace) { return value == ((CompressedWhitespace)obj).value; } return uncompress(null).equals(obj); } /** * Returns a hash code value for the object. */ public int hashCode() { return uncompress(null).hashCode(); } /** * Returns a string representation of the object. */ public String toString() { return uncompress(null).toString(); } /** * Write the value to a Writer */ public void write(Writer writer) throws java.io.IOException { final long val = value; for (int s=56; s>=0; s-=8) { final byte b = (byte)((val>>>s) & 0xff); if (b == 0) { break; } final char c = WHITE_CHARS[b>>>6 & 0x3]; final int len = (b & 0x3f); for (int j=0; j=0; s-=8) { final byte b = (byte)((val>>>s) & 0xff); if (b == 0) { break; } final char c = WHITE_CHARS[b>>>6 & 0x3]; final int len = (b & 0x3f); if (specialChars[c]) { String e = ""; if (c=='\n') { e = " "; } else if (c=='\r') { e = " "; } else if (c=='\t') { e = " "; } for (int j=0; j




© 2015 - 2025 Weber Informatics LLC | Privacy Policy