net.sf.saxon.value.Whitespace Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of saxon-he Show documentation
Show all versions of saxon-he Show documentation
An OSGi bundle for Saxon-HE
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2013 Saxonica Limited.
// This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0.
// If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// This Source Code Form is "Incompatible With Secondary Licenses", as defined by the Mozilla Public License, v. 2.0.
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
package net.sf.saxon.value;
import net.sf.saxon.tree.tiny.CompressedWhitespace;
import net.sf.saxon.tree.util.FastStringBuffer;
/**
* This class provides helper methods and constants for handling whitespace
*/
public class Whitespace {
private Whitespace() {}
/**
* The values PRESERVE, REPLACE, and COLLAPSE represent the three options for whitespace
* normalization. They are deliberately chosen in ascending strength order; given a number
* of whitespace facets, only the strongest needs to be carried out. The option TRIM is
* used instead of COLLAPSE when all valid values have no interior whitespace; trimming
* leading and trailing whitespace is then equivalent to the action of COLLAPSE, but faster.
*/
public static final int PRESERVE = 0;
public static final int REPLACE = 1;
public static final int COLLAPSE = 2;
public static final int TRIM = 3;
/**
* The values NONE, IGNORABLE, and ALL identify which kinds of whitespace text node
* should be stripped when building a source tree. UNSPECIFIED indicates that no
* particular request has been made. XSLT indicates that whitespace should be stripped
* as defined by the xsl:strip-space and xsl:preserve-space declarations in the stylesheet
*/
public static final int NONE = 0;
public static final int IGNORABLE = 1;
public static final int ALL = 2;
public static final int UNSPECIFIED = 3;
public static final int XSLT = 4;
/**
* Test whether a character is whitespace
* @param ch the character (Unicode codepoint) to be tested
* @return true if the character is one of tab, newline, carriage return, or space
*/
public static boolean isWhitespace(int ch) {
switch (ch) {
case 9:
case 10:
case 13:
case 32:
return true;
default:
return false;
}
}
/**
* Apply schema-defined whitespace normalization to a string
* @param action the action to be applied: one of PRESERVE, REPLACE, or COLLAPSE
* @param value the value to be normalized
* @return the value after normalization
*/
public static CharSequence applyWhitespaceNormalization(int action, /*@NotNull*/ CharSequence value) {
switch (action) {
case PRESERVE:
return value;
case REPLACE:
FastStringBuffer sb = new FastStringBuffer(value.length());
for (int i=0; i 32 || !C0WHITE[c]) {
sb.append(c);
}
}
return sb;
} else {
return value;
}
}
/**
* Remove leading whitespace characters from a string
* @param value the string whose leading whitespace is to be removed
* @return the string with leading whitespace removed. This may be the
* original string if there was no leading whitespace
*/
public static CharSequence removeLeadingWhitespace(/*@NotNull*/ CharSequence value) {
int start = -1;
final int len = value.length();
for (int i=0; i 32 || !C0WHITE[c]) {
start = i;
break;
}
}
if (start == 0) {
return value;
} else if (start < 0 || start == len - 1) {
return "";
} else {
return value.subSequence(start, len);
}
}
/**
* Determine if a string contains any whitespace
* @param value the string to be tested
* @return true if the string contains a character that is XML whitespace, that is
* tab, newline, carriage return, or space
*/
public static boolean containsWhitespace(/*@NotNull*/ CharSequence value) {
final int len = value.length();
for (int i=0; i 32 || !C0WHITE[c]) {
return false;
}
}
return true;
}
/*@NotNull*/ private static boolean[] C0WHITE = {
false, false, false, false, false, false, false, false, // 0-7
false, true, true, false, false, true, false, false, // 8-15
false, false, false, false, false, false, false, false, // 16-23
false, false, false, false, false, false, false, false, // 24-31
true // 32
};
/**
* Determine if a character is whitespace
* @param c the character to be tested
* @return true if the character is a whitespace character
*/
public final static boolean isWhite(char c) {
return c<=32 && C0WHITE[c];
}
/**
* Normalize whitespace as defined in XML Schema. Note that this is not the same
* as the XPath normalize-space() function, which is supported by the
* {@link #collapseWhitespace} method
* @param in the string to be normalized
* @return a copy of the string in which any whitespace character is replaced by
* a single space character
*/
/*@NotNull*/ public static CharSequence normalizeWhitespace(/*@NotNull*/ CharSequence in) {
FastStringBuffer sb = new FastStringBuffer(in.length());
for (int i=0; i0 && sb.charAt(nlen-1)==' ') {
sb.setLength(nlen-1);
}
return sb;
}
/**
* Remove leading and trailing whitespace. This has the same effect as collapseWhitespace,
* but is cheaper, for use by data types that do not allow internal whitespace.
* @param in the input string whose whitespace is to be removed
* @return the result of removing excess whitespace
*/
public static CharSequence trimWhitespace(/*@NotNull*/ CharSequence in) {
if (in.length()==0) {
return in;
}
int first = 0;
int last = in.length()-1;
while (true) {
final char x = in.charAt(first);
if (x > 32 || !C0WHITE[x]) {
break;
}
if (first++ >= last) {
return "";
}
}
while (true) {
final char x = in.charAt(last);
if (x > 32 || !C0WHITE[x]) {
break;
}
last--;
}
if (first == 0 && last == in.length()-1) {
return in;
} else {
return in.subSequence(first, last+1);
}
}
/**
* Trim leading and trailing whitespace from a string, returning a string.
* This differs from the Java trim() method in that the only characters treated as
* whitespace are space, \n, \r, and \t. The String#trim() method removes all C0
* control characters (which is not the same thing under XML 1.1).
* @param s the string to be trimmed. If null is supplied, null is returned.
* @return the string with leading and trailing whitespace removed.
* Returns null if and only if the supplied argument is null.
*/
/*@Nullable*/ public static String trim(/*@Nullable*/ CharSequence s) {
if (s == null) {
return null;
}
return trimWhitespace(s).toString();
}
}