All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.text.WordUtils Maven / Gradle / Ivy

There is a newer version: 1.12.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.text;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.Validate;

/**
 * 

Operations on Strings that contain words.

* *

This class tries to handle null input gracefully. * An exception will not be thrown for a null input. * Each method documents its behaviour in more detail.

* * @since 1.1 */ public class WordUtils { /** *

WordUtils instances should NOT be constructed in * standard programming. Instead, the class should be used as * WordUtils.wrap("foo bar", 20);.

* *

This constructor is public to permit tools that require a JavaBean * instance to operate.

*/ public WordUtils() { super(); } // Wrapping //-------------------------------------------------------------------------- /** *

Wraps a single line of text, identifying words by ' '.

* *

New lines will be separated by the system property line separator. * Very long words, such as URLs will not be wrapped.

* *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
inputwrapLengthresult
null*null
""*""
"Here is one line of text that is going to be wrapped after 20 columns."20"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."
"Click here to jump to the commons website - http://commons.apache.org"20"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"
"Click here, http://commons.apache.org, to jump to the commons website"20"Click here,\nhttp://commons.apache.org,\nto jump to the\ncommons website"
* * (assuming that '\n' is the systems line separator) * * @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 * @return a line with newlines inserted, null if null input */ public static String wrap(final String str, final int wrapLength) { return wrap(str, wrapLength, null, false); } /** *

Wraps a single line of text, identifying words by ' '.

* *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
inputwrapLengthnewLineStringwrapLongWordsresult
null**true/falsenull
""**true/false""
"Here is one line of text that is going to be wrapped after 20 columns."20"\n"true/false"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."
"Here is one line of text that is going to be wrapped after 20 columns."20"<br />"true/false"Here is one line of<br />text that is going< * br />to be wrapped after<br />20 columns."
"Here is one line of text that is going to be wrapped after 20 columns."20nulltrue/false"Here is one line of" + systemNewLine + "text that is going" * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."
"Click here to jump to the commons website - http://commons.apache.org"20"\n"false"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"
"Click here to jump to the commons website - http://commons.apache.org"20"\n"true"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"
* * @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 * @param newLineStr the string to insert for a new line, * null uses the system property line separator * @param wrapLongWords true if long words (such as URLs) should be wrapped * @return a line with newlines inserted, null if null input */ public static String wrap(final String str, final int wrapLength, final String newLineStr, final boolean wrapLongWords) { return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); } /** *

Wraps a single line of text, identifying words by wrapOn.

* *

Leading spaces on a new line are stripped. * Trailing spaces are not stripped.

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
inputwrapLengthnewLineStringwrapLongWordswrapOnresult
null**true/false*null
""**true/false*""
"Here is one line of text that is going to be wrapped after 20 columns."20"\n"true/false" ""Here is one line of\ntext that is going\nto be wrapped after\n20 columns."
"Here is one line of text that is going to be wrapped after 20 columns."20"<br />"true/false" ""Here is one line of<br />text that is going<br /> * to be wrapped after<br />20 columns."
"Here is one line of text that is going to be wrapped after 20 columns."20nulltrue/false" ""Here is one line of" + systemNewLine + "text that is going" * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."
"Click here to jump to the commons website - http://commons.apache.org"20"\n"false" ""Click here to jump\nto the commons\nwebsite -\nhttp://commons.apache.org"
"Click here to jump to the commons website - http://commons.apache.org"20"\n"true" ""Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"
"flammable/inflammable"20"\n"true"/""flammable\ninflammable"
* @param str the String to be word wrapped, may be null * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 * @param newLineStr the string to insert for a new line, * null uses the system property line separator * @param wrapLongWords true if long words (such as URLs) should be wrapped * @param wrapOn regex expression to be used as a breakable characters, * if blank string is provided a space character will be used * @return a line with newlines inserted, null if null input */ public static String wrap(final String str, int wrapLength, String newLineStr, final boolean wrapLongWords, String wrapOn) { if (str == null) { return null; } if (newLineStr == null) { newLineStr = System.lineSeparator(); } if (wrapLength < 1) { wrapLength = 1; } if (StringUtils.isBlank(wrapOn)) { wrapOn = " "; } final Pattern patternToWrapOn = Pattern.compile(wrapOn); final int inputLineLength = str.length(); int offset = 0; final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); while (offset < inputLineLength) { int spaceToWrapAt = -1; Matcher matcher = patternToWrapOn.matcher(str.substring(offset, Math .min(offset + wrapLength + 1, inputLineLength))); if (matcher.find()) { if (matcher.start() == 0) { offset += matcher.end(); continue; } else { spaceToWrapAt = matcher.start() + offset; } } // only last line without leading spaces is left if (inputLineLength - offset <= wrapLength) { break; } while (matcher.find()) { spaceToWrapAt = matcher.start() + offset; } if (spaceToWrapAt >= offset) { // normal case wrappedLine.append(str.substring(offset, spaceToWrapAt)); wrappedLine.append(newLineStr); offset = spaceToWrapAt + 1; } else { // really long word or URL if (wrapLongWords) { // wrap really long word one line at a time wrappedLine.append(str.substring(offset, wrapLength + offset)); wrappedLine.append(newLineStr); offset += wrapLength; } else { // do not wrap really long word, just extend beyond limit matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); if (matcher.find()) { spaceToWrapAt = matcher.start() + offset + wrapLength; } if (spaceToWrapAt >= 0) { wrappedLine.append(str.substring(offset, spaceToWrapAt)); wrappedLine.append(newLineStr); offset = spaceToWrapAt + 1; } else { wrappedLine.append(str.substring(offset)); offset = inputLineLength; } } } } // Whatever is left in line is short enough to just pass through wrappedLine.append(str.substring(offset)); return wrappedLine.toString(); } // Capitalizing //----------------------------------------------------------------------- /** *

Capitalizes all the whitespace separated words in a String. * Only the first character of each word is changed. To convert the * rest of each word to lowercase at the same time, * use {@link #capitalizeFully(String)}.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* *
     * WordUtils.capitalize(null)        = null
     * WordUtils.capitalize("")          = ""
     * WordUtils.capitalize("i am FINE") = "I Am FINE"
     * 
* * @param str the String to capitalize, may be null * @return capitalized String, null if null String input * @see #uncapitalize(String) * @see #capitalizeFully(String) */ public static String capitalize(final String str) { return capitalize(str, null); } /** *

Capitalizes all the delimiter separated words in a String. * Only the first character of each word is changed. To convert the * rest of each word to lowercase at the same time, * use {@link #capitalizeFully(String, char[])}.

* *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a * delimiter will be capitalized.

* *

A null input String returns null. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* *
     * WordUtils.capitalize(null, *)            = null
     * WordUtils.capitalize("", *)              = ""
     * WordUtils.capitalize(*, new char[0])     = *
     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
     * 
* * @param str the String to capitalize, may be null * @param delimiters set of characters to determine capitalization, null means whitespace * @return capitalized String, null if null String input * @see #uncapitalize(String) * @see #capitalizeFully(String) */ public static String capitalize(final String str, final char... delimiters) { final int delimLen = delimiters == null ? -1 : delimiters.length; if (StringUtils.isEmpty(str) || delimLen == 0) { return str; } int strLen = str.length(); int [] newCodePoints = new int[strLen]; int outOffset = 0; boolean capitalizeNext = true; for (int index = 0; index < strLen;) { final int codePoint = str.codePointAt(index); if (isDelimiter(codePoint, delimiters)) { capitalizeNext = true; newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); } else if (capitalizeNext) { int titleCaseCodePoint = Character.toTitleCase(codePoint); newCodePoints[outOffset++] = titleCaseCodePoint; index += Character.charCount(titleCaseCodePoint); capitalizeNext = false; } else { newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); } } return new String(newCodePoints, 0, outOffset); } //----------------------------------------------------------------------- /** *

Converts all the whitespace separated words in a String into capitalized words, * that is each word is made up of a titlecase character and then a series of * lowercase characters.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* *
     * WordUtils.capitalizeFully(null)        = null
     * WordUtils.capitalizeFully("")          = ""
     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
     * 
* * @param str the String to capitalize, may be null * @return capitalized String, null if null String input */ public static String capitalizeFully(final String str) { return capitalizeFully(str, null); } /** *

Converts all the delimiter separated words in a String into capitalized words, * that is each word is made up of a titlecase character and then a series of * lowercase characters.

* *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a * delimiter will be capitalized.

* *

A null input String returns null. * Capitalization uses the Unicode title case, normally equivalent to * upper case.

* *
     * WordUtils.capitalizeFully(null, *)            = null
     * WordUtils.capitalizeFully("", *)              = ""
     * WordUtils.capitalizeFully(*, null)            = *
     * WordUtils.capitalizeFully(*, new char[0])     = *
     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
     * 
* * @param str the String to capitalize, may be null * @param delimiters set of characters to determine capitalization, null means whitespace * @return capitalized String, null if null String input */ public static String capitalizeFully(String str, final char... delimiters) { final int delimLen = delimiters == null ? -1 : delimiters.length; if (StringUtils.isEmpty(str) || delimLen == 0) { return str; } str = str.toLowerCase(); return capitalize(str, delimiters); } //----------------------------------------------------------------------- /** *

Uncapitalizes all the whitespace separated words in a String. * Only the first character of each word is changed.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null.

* *
     * WordUtils.uncapitalize(null)        = null
     * WordUtils.uncapitalize("")          = ""
     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
     * 
* * @param str the String to uncapitalize, may be null * @return uncapitalized String, null if null String input * @see #capitalize(String) */ public static String uncapitalize(final String str) { return uncapitalize(str, null); } /** *

Uncapitalizes all the whitespace separated words in a String. * Only the first character of each word is changed.

* *

The delimiters represent a set of characters understood to separate words. * The first string character and the first non-delimiter character after a * delimiter will be uncapitalized.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null.

* *
     * WordUtils.uncapitalize(null, *)            = null
     * WordUtils.uncapitalize("", *)              = ""
     * WordUtils.uncapitalize(*, null)            = *
     * WordUtils.uncapitalize(*, new char[0])     = *
     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
     * 
* * @param str the String to uncapitalize, may be null * @param delimiters set of characters to determine uncapitalization, null means whitespace * @return uncapitalized String, null if null String input * @see #capitalize(String) */ public static String uncapitalize(final String str, final char... delimiters) { final int delimLen = delimiters == null ? -1 : delimiters.length; if (StringUtils.isEmpty(str) || delimLen == 0) { return str; } int strLen = str.length(); int [] newCodePoints = new int[strLen]; int outOffset = 0; boolean uncapitalizeNext = true; for (int index = 0; index < strLen;) { final int codePoint = str.codePointAt(index); if (isDelimiter(codePoint, delimiters)) { uncapitalizeNext = true; newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); } else if (uncapitalizeNext) { int titleCaseCodePoint = Character.toLowerCase(codePoint); newCodePoints[outOffset++] = titleCaseCodePoint; index += Character.charCount(titleCaseCodePoint); uncapitalizeNext = false; } else { newCodePoints[outOffset++] = codePoint; index += Character.charCount(codePoint); } } return new String(newCodePoints, 0, outOffset); } //----------------------------------------------------------------------- /** *

Swaps the case of a String using a word based algorithm.

* *
    *
  • Upper case character converts to Lower case
  • *
  • Title case character converts to Lower case
  • *
  • Lower case character after Whitespace or at start converts to Title case
  • *
  • Other Lower case character converts to Upper case
  • *
* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null.

* *
     * StringUtils.swapCase(null)                 = null
     * StringUtils.swapCase("")                   = ""
     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
     * 
* * @param str the String to swap case, may be null * @return the changed String, null if null String input */ public static String swapCase(final String str) { if (StringUtils.isEmpty(str)) { return str; } final int strLen = str.length(); int [] newCodePoints = new int[strLen]; int outOffset = 0; boolean whitespace = true; for (int index = 0; index < strLen;) { final int oldCodepoint = str.codePointAt(index); final int newCodePoint; if (Character.isUpperCase(oldCodepoint)) { newCodePoint = Character.toLowerCase(oldCodepoint); whitespace = false; } else if (Character.isTitleCase(oldCodepoint)) { newCodePoint = Character.toLowerCase(oldCodepoint); whitespace = false; } else if (Character.isLowerCase(oldCodepoint)) { if (whitespace) { newCodePoint = Character.toTitleCase(oldCodepoint); whitespace = false; } else { newCodePoint = Character.toUpperCase(oldCodepoint); } } else { whitespace = Character.isWhitespace(oldCodepoint); newCodePoint = oldCodepoint; } newCodePoints[outOffset++] = newCodePoint; index += Character.charCount(newCodePoint); } return new String(newCodePoints, 0, outOffset); } //----------------------------------------------------------------------- /** *

Extracts the initial characters from each word in the String.

* *

All first characters after whitespace are returned as a new string. * Their case is not changed.

* *

Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null.

* *
     * WordUtils.initials(null)             = null
     * WordUtils.initials("")               = ""
     * WordUtils.initials("Ben John Lee")   = "BJL"
     * WordUtils.initials("Ben J.Lee")      = "BJ"
     * 
* * @param str the String to get initials from, may be null * @return String of initial letters, null if null String input * @see #initials(String,char[]) */ public static String initials(final String str) { return initials(str, null); } /** *

Extracts the initial characters from each word in the String.

* *

All first characters after the defined delimiters are returned as a new string. * Their case is not changed.

* *

If the delimiters array is null, then Whitespace is used. * Whitespace is defined by {@link Character#isWhitespace(char)}. * A null input String returns null. * An empty delimiter array returns an empty String.

* *
     * WordUtils.initials(null, *)                = null
     * WordUtils.initials("", *)                  = ""
     * WordUtils.initials("Ben John Lee", null)   = "BJL"
     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
     * WordUtils.initials(*, new char[0])         = ""
     * 
* * @param str the String to get initials from, may be null * @param delimiters set of characters to determine words, null means whitespace * @return String of initial characters, null if null String input * @see #initials(String) */ public static String initials(final String str, final char... delimiters) { if (StringUtils.isEmpty(str)) { return str; } if (delimiters != null && delimiters.length == 0) { return ""; } final int strLen = str.length(); final char[] buf = new char[strLen / 2 + 1]; int count = 0; boolean lastWasGap = true; for (int i = 0; i < strLen; i++) { final char ch = str.charAt(i); if (isDelimiter(ch, delimiters)) { lastWasGap = true; } else if (lastWasGap) { buf[count++] = ch; lastWasGap = false; } else { continue; // ignore ch } } return new String(buf, 0, count); } //----------------------------------------------------------------------- /** *

Checks if the String contains all words in the given array.

* *

* A {@code null} String will return {@code false}. A {@code null}, zero * length search array or if one element of array is null will return {@code false}. *

* *
     * WordUtils.containsAllWords(null, *)            = false
     * WordUtils.containsAllWords("", *)              = false
     * WordUtils.containsAllWords(*, null)            = false
     * WordUtils.containsAllWords(*, [])              = false
     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
     * WordUtils.containsAllWords("abc def", "def", "abc") = true
     * 
* * @param word The CharSequence to check, may be null * @param words The array of String words to search for, may be null * @return {@code true} if all search words are found, {@code false} otherwise */ public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { return false; } for (final CharSequence w : words) { if (StringUtils.isBlank(w)) { return false; } final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); if (!p.matcher(word).matches()) { return false; } } return true; } //----------------------------------------------------------------------- /** * Is the character a delimiter. * * @param ch the character to check * @param delimiters the delimiters * @return true if it is a delimiter */ public static boolean isDelimiter(final char ch, final char[] delimiters) { if (delimiters == null) { return Character.isWhitespace(ch); } for (final char delimiter : delimiters) { if (ch == delimiter) { return true; } } return false; } //----------------------------------------------------------------------- /** * Is the codePoint a delimiter. * * @param codePoint the codePint to check * @param delimiters the delimiters * @return true if it is a delimiter */ public static boolean isDelimiter(final int codePoint, final char[] delimiters) { if (delimiters == null) { return Character.isWhitespace(codePoint); } for (int index = 0; index < delimiters.length; index++) { int delimiterCodePoint = Character.codePointAt(delimiters, index); if (delimiterCodePoint == codePoint) { return true; } } return false; } //----------------------------------------------------------------------- /** * Abbreviates the words nicely. * * This method searches for the first space after the lower limit and abbreviates * the String there. It will also append any String passed as a parameter * to the end of the String. The upper limit can be specified to forcibly * abbreviate a String. * * @param str the string to be abbreviated. If null is passed, null is returned. * If the empty String is passed, the empty string is returned. * @param lower the lower limit. * @param upper the upper limit; specify -1 if no limit is desired. * If the upper limit is lower than the lower limit, it will be * adjusted to be the same as the lower limit. * @param appendToEnd String to be appended to the end of the abbreviated string. * This is appended ONLY if the string was indeed abbreviated. * The append does not count towards the lower or upper limits. * @return the abbreviated String. * *
     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
     * 
*/ public static String abbreviate(String str, int lower, int upper, String appendToEnd) { Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); if (StringUtils.isEmpty(str)) { return str; } // if the lower value is greater than the length of the string, // set to the length of the string if (lower > str.length()) { lower = str.length(); } // if the upper value is -1 (i.e. no limit) or is greater // than the length of the string, set to the length of the string if (upper == -1 || upper > str.length()) { upper = str.length(); } final StringBuilder result = new StringBuilder(); final int index = StringUtils.indexOf(str, " ", lower); if (index == -1) { result.append(str.substring(0, upper)); // only if abbreviation has occured do we append the appendToEnd value if (upper != str.length()) { result.append(StringUtils.defaultString(appendToEnd)); } } else if (index > upper) { result.append(str.substring(0, upper)); result.append(StringUtils.defaultString(appendToEnd)); } else { result.append(str.substring(0, index)); result.append(StringUtils.defaultString(appendToEnd)); } return result.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy