org.apache.commons.lang3.text.WordUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-lang3 Show documentation
Apache Commons Lang, a package of Java utility classes for the classes that are in java.lang's hierarchy, or are considered to be so standard as to justify existence in java.lang.
There is a newer version: 3.17.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.lang3.text;

import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.SystemUtils;

/**
 * Operations on Strings that contain words.
 * 
 * This class tries to handle null input gracefully.
 * An exception will not be thrown for a null input.
 * Each method documents its behaviour in more detail.
 * 
 * @since 2.0
 * @version $Id: WordUtils.java 1148520 2011-07-19 20:53:23Z ggregory $
 */
public class WordUtils {

    /**
     * WordUtils instances should NOT be constructed in
     * standard programming. Instead, the class should be used as
     * WordUtils.wrap("foo bar", 20);.
     *
     * This constructor is public to permit tools that require a JavaBean
     * instance to operate.
     */
    public WordUtils() {
      super();
    }

    // Wrapping
    //--------------------------------------------------------------------------
    /**
     * Wraps a single line of text, identifying words by ' '.
     * 
     * New lines will be separated by the system property line separator.
     * Very long words, such as URLs will not be wrapped.
     * 
     * Leading spaces on a new line are stripped.
     * Trailing spaces are not stripped.
     *
     *      * WordUtils.wrap(null, *) = null
     * WordUtils.wrap("", *) = ""
     * 
     *
     * @param str  the String to be word wrapped, may be null
     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
     * @return a line with newlines inserted, null if null input
     */
    public static String wrap(String str, int wrapLength) {
        return wrap(str, wrapLength, null, false);
    }
    
    /**
     * Wraps a single line of text, identifying words by ' '.
     * 
     * Leading spaces on a new line are stripped.
     * Trailing spaces are not stripped.
     * 
     *      * WordUtils.wrap(null, *, *, *) = null
     * WordUtils.wrap("", *, *, *) = ""
     * 
     *
     * @param str  the String to be word wrapped, may be null
     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
     * @param newLineStr  the string to insert for a new line, 
     *  null uses the system property line separator
     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
     * @return a line with newlines inserted, null if null input
     */
    public static String wrap(String str, int wrapLength, String newLineStr, boolean wrapLongWords) {
        if (str == null) {
            return null;
        }
        if (newLineStr == null) {
            newLineStr = SystemUtils.LINE_SEPARATOR;
        }
        if (wrapLength < 1) {
            wrapLength = 1;
        }
        int inputLineLength = str.length();
        int offset = 0;
        StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
        
        while ((inputLineLength - offset) > wrapLength) {
            if (str.charAt(offset) == ' ') {
                offset++;
                continue;
            }
            int spaceToWrapAt = str.lastIndexOf(' ', wrapLength + offset);

            if (spaceToWrapAt >= offset) {
                // normal case
                wrappedLine.append(str.substring(offset, spaceToWrapAt));
                wrappedLine.append(newLineStr);
                offset = spaceToWrapAt + 1;
                
            } else {
                // really long word or URL
                if (wrapLongWords) {
                    // wrap really long word one line at a time
                    wrappedLine.append(str.substring(offset, wrapLength + offset));
                    wrappedLine.append(newLineStr);
                    offset += wrapLength;
                } else {
                    // do not wrap really long word, just extend beyond limit
                    spaceToWrapAt = str.indexOf(' ', wrapLength + offset);
                    if (spaceToWrapAt >= 0) {
                        wrappedLine.append(str.substring(offset, spaceToWrapAt));
                        wrappedLine.append(newLineStr);
                        offset = spaceToWrapAt + 1;
                    } else {
                        wrappedLine.append(str.substring(offset));
                        offset = inputLineLength;
                    }
                }
            }
        }

        // Whatever is left in line is short enough to just pass through
        wrappedLine.append(str.substring(offset));

        return wrappedLine.toString();
    }

    // Capitalizing
    //-----------------------------------------------------------------------
    /**
     * Capitalizes all the whitespace separated words in a String.
     * Only the first letter of each word is changed. To convert the 
     * rest of each word to lowercase at the same time, 
     * use {@link #capitalizeFully(String)}.
     *
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     * Capitalization uses the Unicode title case, normally equivalent to
     * upper case.
     *
     *      * WordUtils.capitalize(null)        = null
     * WordUtils.capitalize("")          = ""
     * WordUtils.capitalize("i am FINE") = "I Am FINE"
     * 
     * 
     * @param str  the String to capitalize, may be null
     * @return capitalized String, null if null String input
     * @see #uncapitalize(String)
     * @see #capitalizeFully(String)
     */
    public static String capitalize(String str) {
        return capitalize(str, null);
    }

    /**
     * Capitalizes all the delimiter separated words in a String.
     * Only the first letter of each word is changed. To convert the 
     * rest of each word to lowercase at the same time, 
     * use {@link #capitalizeFully(String, char[])}.
     *
     * The delimiters represent a set of characters understood to separate words.
     * The first string character and the first non-delimiter character after a
     * delimiter will be capitalized. 
     *
     * A null input String returns null.
     * Capitalization uses the Unicode title case, normally equivalent to
     * upper case.
     *
     *      * WordUtils.capitalize(null, *)            = null
     * WordUtils.capitalize("", *)              = ""
     * WordUtils.capitalize(*, new char[0])     = *
     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
     * 
     * 
     * @param str  the String to capitalize, may be null
     * @param delimiters  set of characters to determine capitalization, null means whitespace
     * @return capitalized String, null if null String input
     * @see #uncapitalize(String)
     * @see #capitalizeFully(String)
     * @since 2.1
     */
    public static String capitalize(String str, char... delimiters) {
        int delimLen = delimiters == null ? -1 : delimiters.length;
        if (StringUtils.isEmpty(str) || delimLen == 0) {
            return str;
        }
        char[] buffer = str.toCharArray();
        boolean capitalizeNext = true;
        for (int i = 0; i < buffer.length; i++) {
            char ch = buffer[i];
            if (isDelimiter(ch, delimiters)) {
                capitalizeNext = true;
            } else if (capitalizeNext) {
                buffer[i] = Character.toTitleCase(ch);
                capitalizeNext = false;
            }
        }
        return new String(buffer);
    }

    //-----------------------------------------------------------------------
    /**
     * Converts all the whitespace separated words in a String into capitalized words, 
     * that is each word is made up of a titlecase character and then a series of 
     * lowercase characters.  
     *
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     * Capitalization uses the Unicode title case, normally equivalent to
     * upper case.
     *
     *      * WordUtils.capitalizeFully(null)        = null
     * WordUtils.capitalizeFully("")          = ""
     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
     * 
     * 
     * @param str  the String to capitalize, may be null
     * @return capitalized String, null if null String input
     */
    public static String capitalizeFully(String str) {
        return capitalizeFully(str, null);
    }

    /**
     * Converts all the delimiter separated words in a String into capitalized words, 
     * that is each word is made up of a titlecase character and then a series of 
     * lowercase characters. 
     *
     * The delimiters represent a set of characters understood to separate words.
     * The first string character and the first non-delimiter character after a
     * delimiter will be capitalized. 
     *
     * A null input String returns null.
     * Capitalization uses the Unicode title case, normally equivalent to
     * upper case.
     *
     *      * WordUtils.capitalizeFully(null, *)            = null
     * WordUtils.capitalizeFully("", *)              = ""
     * WordUtils.capitalizeFully(*, null)            = *
     * WordUtils.capitalizeFully(*, new char[0])     = *
     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
     * 
     * 
     * @param str  the String to capitalize, may be null
     * @param delimiters  set of characters to determine capitalization, null means whitespace
     * @return capitalized String, null if null String input
     * @since 2.1
     */
    public static String capitalizeFully(String str, char... delimiters) {
        int delimLen = (delimiters == null ? -1 : delimiters.length);
        if (StringUtils.isEmpty(str) || delimLen == 0) {
            return str;
        }
        str = str.toLowerCase();
        return capitalize(str, delimiters);
    }

    //-----------------------------------------------------------------------
    /**
     * Uncapitalizes all the whitespace separated words in a String.
     * Only the first letter of each word is changed.
     *
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     *
     *      * WordUtils.uncapitalize(null)        = null
     * WordUtils.uncapitalize("")          = ""
     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
     * 
     * 
     * @param str  the String to uncapitalize, may be null
     * @return uncapitalized String, null if null String input
     * @see #capitalize(String)
     */
    public static String uncapitalize(String str) {
        return uncapitalize(str, null);
    }

    /**
     * Uncapitalizes all the whitespace separated words in a String.
     * Only the first letter of each word is changed.
     *
     * The delimiters represent a set of characters understood to separate words.
     * The first string character and the first non-delimiter character after a
     * delimiter will be uncapitalized. 
     *
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     *
     *      * WordUtils.uncapitalize(null, *)            = null
     * WordUtils.uncapitalize("", *)              = ""
     * WordUtils.uncapitalize(*, null)            = *
     * WordUtils.uncapitalize(*, new char[0])     = *
     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
     * 
     * 
     * @param str  the String to uncapitalize, may be null
     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
     * @return uncapitalized String, null if null String input
     * @see #capitalize(String)
     * @since 2.1
     */
    public static String uncapitalize(String str, char... delimiters) {
        int delimLen = (delimiters == null ? -1 : delimiters.length);
        if (StringUtils.isEmpty(str) || delimLen == 0) {
            return str;
        }
        char[] buffer = str.toCharArray();
        boolean uncapitalizeNext = true;
        for (int i = 0; i < buffer.length; i++) {
            char ch = buffer[i];
            if (isDelimiter(ch, delimiters)) {
                uncapitalizeNext = true;
            } else if (uncapitalizeNext) {
                buffer[i] = Character.toLowerCase(ch);
                uncapitalizeNext = false;
            }
        }
        return new String(buffer);
    }

    //-----------------------------------------------------------------------
    /**
     * Swaps the case of a String using a word based algorithm.
     * 
     * 
     *  Upper case character converts to Lower case
     *  Title case character converts to Lower case
     *  Lower case character after Whitespace or at start converts to Title case
     *  Other Lower case character converts to Upper case
     * 
     * 
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     * 
     *      * StringUtils.swapCase(null)                 = null
     * StringUtils.swapCase("")                   = ""
     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
     * 
     * 
     * @param str  the String to swap case, may be null
     * @return the changed String, null if null String input
     */
    public static String swapCase(String str) {
        if (StringUtils.isEmpty(str)) {
            return str;
        }
        char[] buffer = str.toCharArray();

        boolean whitespace = true;

        for (int i = 0; i < buffer.length; i++) {
            char ch = buffer[i];
            if (Character.isUpperCase(ch)) {
                buffer[i] = Character.toLowerCase(ch);
                whitespace = false;
            } else if (Character.isTitleCase(ch)) {
                buffer[i] = Character.toLowerCase(ch);
                whitespace = false;
            } else if (Character.isLowerCase(ch)) {
                if (whitespace) {
                    buffer[i] = Character.toTitleCase(ch);
                    whitespace = false;
                } else {
                    buffer[i] = Character.toUpperCase(ch);
                }
            } else {
                whitespace = Character.isWhitespace(ch);
            }
        }
        return new String(buffer);
    }

    //-----------------------------------------------------------------------
    /**
     * Extracts the initial letters from each word in the String.
     * 
     * The first letter of the string and all first letters after
     * whitespace are returned as a new string.
     * Their case is not changed.
     *
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     *
     *      * WordUtils.initials(null)             = null
     * WordUtils.initials("")               = ""
     * WordUtils.initials("Ben John Lee")   = "BJL"
     * WordUtils.initials("Ben J.Lee")      = "BJ"
     * 
     *
     * @param str  the String to get initials from, may be null
     * @return String of initial letters, null if null String input
     * @see #initials(String,char[])
     * @since 2.2
     */
    public static String initials(String str) {
        return initials(str, null);
    }

    /**
     * Extracts the initial letters from each word in the String.
     * 
     * The first letter of the string and all first letters after the
     * defined delimiters are returned as a new string.
     * Their case is not changed.
     *
     * If the delimiters array is null, then Whitespace is used.
     * Whitespace is defined by {@link Character#isWhitespace(char)}.
     * A null input String returns null.
     * An empty delimiter array returns an empty String.
     *
     *      * WordUtils.initials(null, *)                = null
     * WordUtils.initials("", *)                  = ""
     * WordUtils.initials("Ben John Lee", null)   = "BJL"
     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
     * WordUtils.initials(*, new char[0])         = ""
     * 
     * 
     * @param str  the String to get initials from, may be null
     * @param delimiters  set of characters to determine words, null means whitespace
     * @return String of initial letters, null if null String input
     * @see #initials(String)
     * @since 2.2
     */
    public static String initials(String str, char... delimiters) {
        if (StringUtils.isEmpty(str)) {
            return str;
        }
        if (delimiters != null && delimiters.length == 0) {
            return "";
        }
        int strLen = str.length();
        char[] buf = new char[strLen / 2 + 1];
        int count = 0;
        boolean lastWasGap = true;
        for (int i = 0; i < strLen; i++) {
            char ch = str.charAt(i);

            if (isDelimiter(ch, delimiters)) {
                lastWasGap = true;
            } else if (lastWasGap) {
                buf[count++] = ch;
                lastWasGap = false;
            } else {
                continue; // ignore ch
            }
        }
        return new String(buf, 0, count);
    }

    //-----------------------------------------------------------------------
    /**
     * Is the character a delimiter.
     *
     * @param ch  the character to check
     * @param delimiters  the delimiters
     * @return true if it is a delimiter
     */
    private static boolean isDelimiter(char ch, char[] delimiters) {
        if (delimiters == null) {
            return Character.isWhitespace(ch);
        }
        for (char delimiter : delimiters) {
            if (ch == delimiter) {
                return true;
            }
        }
        return false;
    }

}