net.sf.saxon.value.Whitespace Maven / Gradle / Ivy
package net.sf.saxon.value;
import net.sf.saxon.om.FastStringBuffer;
import net.sf.saxon.tinytree.CompressedWhitespace;
/**
* This class provides helper methods and constants for handling whitespace
*/
public class Whitespace {
private Whitespace() {}
/**
* The values PRESERVE, REPLACE, and COLLAPSE represent the three options for whitespace
* normalization. They are deliberately chosen in ascending strength order; given a number
* of whitespace facets, only the strongest needs to be carried out.
*/
public static final int PRESERVE = 0;
public static final int REPLACE = 1;
public static final int COLLAPSE = 2;
/**
* The values NONE, IGNORABLE, and ALL identify which kinds of whitespace text node
* should be stripped when building a source tree. UNSPECIFIED indicates that no
* particular request has been made. XSLT indicates that whitespace should be stripped
* as defined by the xsl:strip-space and xsl:preserve-space declarations in the stylesheet
*/
public static final int NONE = 0;
public static final int IGNORABLE = 1;
public static final int ALL = 2;
public static final int UNSPECIFIED = 3;
public static final int XSLT = 4;
/**
* Test whether a character is whitespace
* @param ch the character (Unicode codepoint) to be tested
* @return true if the character is one of tab, newline, carriage return, or space
*/
public static boolean isWhitespace(int ch) {
switch (ch) {
case 9:
case 10:
case 13:
case 32:
return true;
default:
return false;
}
}
/**
* Apply schema-defined whitespace normalization to a string
* @param action the action to be applied: one of PRESERVE, REPLACE, or COLLAPSE
* @param value the value to be normalized
* @return the value after normalization
*/
public static CharSequence applyWhitespaceNormalization(int action, CharSequence value) {
switch (action) {
case PRESERVE:
return value;
case REPLACE:
FastStringBuffer sb = new FastStringBuffer(value.length());
for (int i=0; i 32 || !C0WHITE[c]) {
sb.append(c);
}
}
return sb;
} else {
return value;
}
}
/**
* Remove leading whitespace characters from a string
* @param value the string whose leading whitespace is to be removed
* @return the string with leading whitespace removed. This may be the
* original string if there was no leading whitespace
*/
public static CharSequence removeLeadingWhitespace(CharSequence value) {
int start = -1;
final int len = value.length();
for (int i=0; i 32 || !C0WHITE[c]) {
start = i;
break;
}
}
if (start == 0) {
return value;
} else if (start < 0 || start == len - 1) {
return "";
} else {
return value.subSequence(start, len);
}
}
/**
* Determine if a string contains any whitespace
* @param value the string to be tested
* @return true if the string contains a character that is XML whitespace, that is
* tab, newline, carriage return, or space
*/
public static boolean containsWhitespace(CharSequence value) {
final int len = value.length();
for (int i=0; i 32 || !C0WHITE[c]) {
return false;
}
}
return true;
}
private static boolean[] C0WHITE = {
false, false, false, false, false, false, false, false, // 0-7
false, true, true, false, false, true, false, false, // 8-15
false, false, false, false, false, false, false, false, // 16-23
false, false, false, false, false, false, false, false, // 24-31
true // 32
};
/**
* Normalize whitespace as defined in XML Schema. Note that this is not the same
* as the XPath normalize-space() function, which is supported by the
* {@link #collapseWhitespace} method
* @param in the string to be normalized
* @return a copy of the string in which any whitespace character is replaced by
* a single space character
*/
public static CharSequence normalizeWhitespace(CharSequence in) {
FastStringBuffer sb = new FastStringBuffer(in.length());
for (int i=0; i0 && sb.charAt(nlen-1)==' ') {
sb.setLength(nlen-1);
}
return sb;
}
/**
* Remove leading and trailing whitespace. This has the same effect as collapseWhitespace,
* but is cheaper, for use by data types that do not allow internal whitespace.
* @param in the input string whose whitespace is to be removed
* @return the result of removing excess whitespace
*/
public static CharSequence trimWhitespace(CharSequence in) {
if (in.length()==0) {
return in;
}
int first = 0;
int last = in.length()-1;
while (true) {
final char x = in.charAt(first);
if (x > 32 || !C0WHITE[x]) {
break;
}
if (first++ >= last) {
return "";
}
}
while (true) {
final char x = in.charAt(last);
if (x > 32 || !C0WHITE[x]) {
break;
}
last--;
}
if (first == 0 && last == in.length()-1) {
return in;
} else {
return in.subSequence(first, last+1);
}
}
/**
* Trim leading and trailing whitespace from a string, returning a string.
* This differs from the Java trim() method in that the only characters treated as
* whitespace are space, \n, \r, and \t. The String#trim() method removes all C0
* control characters (which is not the same thing under XML 1.1).
* @param s the string to be trimmed. If null is supplied, null is returned.
* @return the string with leading and trailing whitespace removed.
*/
public static String trim(CharSequence s) {
if (s == null) {
return null;
}
return trimWhitespace(s).toString();
}
}
//
// The contents of this file are subject to the Mozilla Public License Version 1.0 (the "License");
// you may not use this file except in compliance with the License. You may obtain a copy of the
// License at http://www.mozilla.org/MPL/
//
// Software distributed under the License is distributed on an "AS IS" basis,
// WITHOUT WARRANTY OF ANY KIND, either express or implied.
// See the License for the specific language governing rights and limitations under the License.
//
// The Original Code is: all this file
//
// The Initial Developer of the Original Code is Michael H. Kay.
//
// Contributor(s):
//
© 2015 - 2025 Weber Informatics LLC | Privacy Policy