net.sf.saxon.value.Whitespace Maven / Gradle / Ivy

Go to download
package net.sf.saxon.value;

import net.sf.saxon.om.FastStringBuffer;
import net.sf.saxon.tinytree.CompressedWhitespace;

/**
 * This class provides helper methods and constants for handling whitespace
 */
public class Whitespace {

    private Whitespace() {}


    /**
     * The values PRESERVE, REPLACE, and COLLAPSE represent the three options for whitespace
     * normalization. They are deliberately chosen in ascending strength order; given a number
     * of whitespace facets, only the strongest needs to be carried out.
     */

    public static final int PRESERVE = 0;
    public static final int REPLACE = 1;
    public static final int COLLAPSE = 2;

    /**
     * The values NONE, IGNORABLE, and ALL identify which kinds of whitespace text node
     * should be stripped when building a source tree. UNSPECIFIED indicates that no
     * particular request has been made. XSLT indicates that whitespace should be stripped
     * as defined by the xsl:strip-space and xsl:preserve-space declarations in the stylesheet
     */

    public static final int NONE = 0;
    public static final int IGNORABLE = 1;
    public static final int ALL = 2;
    public static final int UNSPECIFIED = 3;
    public static final int XSLT = 4;

    /**
     * Test whether a character is whitespace
     * @param ch the character (Unicode codepoint) to be tested
     * @return true if the character is one of tab, newline, carriage return, or space
     */

    public static boolean isWhitespace(int ch) {
        switch (ch) {
            case 9:
            case 10:
            case 13:
            case 32:
                return true;
            default:
                return false;
        }
    }

    /**
     * Apply schema-defined whitespace normalization to a string
     * @param action the action to be applied: one of PRESERVE, REPLACE, or COLLAPSE
     * @param value the value to be normalized
     * @return the value after normalization
     */

    public static CharSequence applyWhitespaceNormalization(int action, CharSequence value) {
        switch (action) {
            case PRESERVE:
                return value;
            case REPLACE:
                FastStringBuffer sb = new FastStringBuffer(value.length());
                for (int i=0; i 32 || !C0WHITE[c]) {
                    sb.append(c);
                }
            }
            return sb;
        } else {
            return value;
        }
    }

    /**
     * Remove leading whitespace characters from a string
     * @param value the string whose leading whitespace is to be removed
     * @return the string with leading whitespace removed. This may be the
     * original string if there was no leading whitespace
     */

    public static CharSequence removeLeadingWhitespace(CharSequence value) {
        int start = -1;
        final int len = value.length();
        for (int i=0; i 32 || !C0WHITE[c]) {
                start = i;
                break;
            }
        }
        if (start == 0) {
            return value;
        } else if (start < 0 || start == len - 1) {
            return "";
        } else {
            return value.subSequence(start, len);
        }
    }

    /**
     * Determine if a string contains any whitespace
     * @param value the string to be tested
     * @return true if the string contains a character that is XML whitespace, that is
     * tab, newline, carriage return, or space
     */

    public static boolean containsWhitespace(CharSequence value) {
        final int len = value.length();
        for (int i=0; i 32 || !C0WHITE[c]) {
                return false;
            }
        }
        return true;
    }

    private static boolean[] C0WHITE = {
        false, false, false, false, false, false, false, false,  // 0-7
        false, true, true, false, false, true, false, false,     // 8-15
        false, false, false, false, false, false, false, false,  // 16-23
        false, false, false, false, false, false, false, false,  // 24-31
        true                                                     // 32
    };

    /**
     * Normalize whitespace as defined in XML Schema. Note that this is not the same
     * as the XPath normalize-space() function, which is supported by the
     * {@link #collapseWhitespace} method
     * @param in the string to be normalized
     * @return a copy of the string in which any whitespace character is replaced by
     * a single space character
    */

    public static CharSequence normalizeWhitespace(CharSequence in) {
        FastStringBuffer sb = new FastStringBuffer(in.length());
        for (int i=0; i0 && sb.charAt(nlen-1)==' ') {
            sb.setLength(nlen-1);
        }
        return sb;
    }

    /**
     * Remove leading and trailing whitespace. This has the same effect as collapseWhitespace,
     * but is cheaper, for use by data types that do not allow internal whitespace.
     * @param in the input string whose whitespace is to be removed
     * @return the result of removing excess whitespace
     */
    public static CharSequence trimWhitespace(CharSequence in) {
        if (in.length()==0) {
            return in;
        }
        int first = 0;
        int last = in.length()-1;
        while (true) {
            final char x = in.charAt(first);
            if (x > 32 || !C0WHITE[x]) {
                break;
            }
            if (first++ >= last) {
                return "";
            }
        }
        while (true) {
            final char x = in.charAt(last);
            if (x > 32 || !C0WHITE[x]) {
                break;
            }
            last--;
        }
        if (first == 0 && last == in.length()-1) {
            return in;
        } else {
            return in.subSequence(first, last+1);
        }
    }

    /**
     * Trim leading and trailing whitespace from a string, returning a string.
     * This differs from the Java trim() method in that the only characters treated as
     * whitespace are space, \n, \r, and \t. The String#trim() method removes all C0
     * control characters (which is not the same thing under XML 1.1).
     * @param s the string to be trimmed. If null is supplied, null is returned.
     * @return the string with leading and trailing whitespace removed.
     */

    public static String trim(CharSequence s) {
        if (s == null) {
            return null;
        }
        return trimWhitespace(s).toString();
    }
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Contributor(s):
//