All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.landawn.abacus.util.RegExUtil Maven / Gradle / Ivy

There is a newer version: 1.10.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.landawn.abacus.util;

import java.util.regex.Pattern;

/**
 * Note: Copied from Apache Commons Lang under Apache License V2.
 * 
 * 
* *

Helpers to process Strings using regular expressions.

* @see java.util.regex.Pattern * @since 3.8 */ public final class RegExUtil { private RegExUtil() { // Singleton for utility class. } /** *

Removes each substring of the text String that matches the given regular expression pattern.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code pattern.matcher(text).replaceAll(N.EMPTY_STRING)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.removeAll(null, *)      = null
     * StringUtils.removeAll("any", (Pattern) null)  = "any"
     * StringUtils.removeAll("any", Pattern.compile(""))    = "any"
     * StringUtils.removeAll("any", Pattern.compile(".*"))  = ""
     * StringUtils.removeAll("any", Pattern.compile(".+"))  = ""
     * StringUtils.removeAll("abc", Pattern.compile(".?"))  = ""
     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\nB"
     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
     * StringUtils.removeAll("A<__>\n<__>B", Pattern.compile("<.*>", Pattern.DOTALL))  = "AB"
     * StringUtils.removeAll("ABCabc123abc", Pattern.compile("[a-z]"))     = "ABC123"
     * 
* * @param text text to remove from, may be null * @param regex the regular expression to which this string is to be matched * @return the text with any removes processed, * {@code null} if null String input * * @see #replaceAll(String, Pattern, String) * @see java.util.regex.Matcher#replaceAll(String) * @see java.util.regex.Pattern */ public static String removeAll(final String text, final Pattern regex) { return replaceAll(text, regex, N.EMPTY_STRING); } /** *

Removes each substring of the text String that matches the given regular expression.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceAll(regex, N.EMPTY_STRING)}
  • *
  • {@code Pattern.compile(regex).matcher(text).replaceAll(N.EMPTY_STRING)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *

Unlike in the {@link #removePattern(String, String)} method, the {@link Pattern#DOTALL} option * is NOT automatically added. * To use the DOTALL option prepend "(?s)" to the regex. * DOTALL is also known as single-line mode in Perl.

* *
     * StringUtils.removeAll(null, *)      = null
     * StringUtils.removeAll("any", (String) null)  = "any"
     * StringUtils.removeAll("any", "")    = "any"
     * StringUtils.removeAll("any", ".*")  = ""
     * StringUtils.removeAll("any", ".+")  = ""
     * StringUtils.removeAll("abc", ".?")  = ""
     * StringUtils.removeAll("A<__>\n<__>B", "<.*>")      = "A\nB"
     * StringUtils.removeAll("A<__>\n<__>B", "(?s)<.*>")  = "AB"
     * StringUtils.removeAll("ABCabc123abc", "[a-z]")     = "ABC123"
     * 
* * @param text text to remove from, may be null * @param regex the regular expression to which this string is to be matched * @return the text with any removes processed, * {@code null} if null String input * * @throws java.util.regex.PatternSyntaxException * if the regular expression's syntax is invalid * * @see #replaceAll(String, String, String) * @see #removePattern(String, String) * @see String#replaceAll(String, String) * @see java.util.regex.Pattern * @see java.util.regex.Pattern#DOTALL */ public static String removeAll(final String text, final String regex) { return replaceAll(text, regex, N.EMPTY_STRING); } /** *

Removes the first substring of the text string that matches the given regular expression pattern.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code pattern.matcher(text).replaceFirst(N.EMPTY_STRING)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.removeFirst(null, *)      = null
     * StringUtils.removeFirst("any", (Pattern) null)  = "any"
     * StringUtils.removeFirst("any", Pattern.compile(""))    = "any"
     * StringUtils.removeFirst("any", Pattern.compile(".*"))  = ""
     * StringUtils.removeFirst("any", Pattern.compile(".+"))  = ""
     * StringUtils.removeFirst("abc", Pattern.compile(".?"))  = "bc"
     * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("<.*>"))      = "A\n<__>B"
     * StringUtils.removeFirst("A<__>\n<__>B", Pattern.compile("(?s)<.*>"))  = "AB"
     * StringUtils.removeFirst("ABCabc123", Pattern.compile("[a-z]"))          = "ABCbc123"
     * StringUtils.removeFirst("ABCabc123abc", Pattern.compile("[a-z]+"))      = "ABC123abc"
     * 
* * @param text text to remove from, may be null * @param regex the regular expression pattern to which this string is to be matched * @return the text with the first replacement processed, * {@code null} if null String input * * @see #replaceFirst(String, Pattern, String) * @see java.util.regex.Matcher#replaceFirst(String) * @see java.util.regex.Pattern */ public static String removeFirst(final String text, final Pattern regex) { return replaceFirst(text, regex, N.EMPTY_STRING); } /** *

Removes the first substring of the text string that matches the given regular expression.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceFirst(regex, N.EMPTY_STRING)}
  • *
  • {@code Pattern.compile(regex).matcher(text).replaceFirst(N.EMPTY_STRING)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *

The {@link Pattern#DOTALL} option is NOT automatically added. * To use the DOTALL option prepend "(?s)" to the regex. * DOTALL is also known as single-line mode in Perl.

* *
     * StringUtils.removeFirst(null, *)      = null
     * StringUtils.removeFirst("any", (String) null)  = "any"
     * StringUtils.removeFirst("any", "")    = "any"
     * StringUtils.removeFirst("any", ".*")  = ""
     * StringUtils.removeFirst("any", ".+")  = ""
     * StringUtils.removeFirst("abc", ".?")  = "bc"
     * StringUtils.removeFirst("A<__>\n<__>B", "<.*>")      = "A\n<__>B"
     * StringUtils.removeFirst("A<__>\n<__>B", "(?s)<.*>")  = "AB"
     * StringUtils.removeFirst("ABCabc123", "[a-z]")          = "ABCbc123"
     * StringUtils.removeFirst("ABCabc123abc", "[a-z]+")      = "ABC123abc"
     * 
* * @param text text to remove from, may be null * @param regex the regular expression to which this string is to be matched * @return the text with the first replacement processed, * {@code null} if null String input * * @throws java.util.regex.PatternSyntaxException * if the regular expression's syntax is invalid * * @see #replaceFirst(String, String, String) * @see String#replaceFirst(String, String) * @see java.util.regex.Pattern * @see java.util.regex.Pattern#DOTALL */ public static String removeFirst(final String text, final String regex) { return replaceFirst(text, regex, N.EMPTY_STRING); } /** *

Removes each substring of the source String that matches the given regular expression using the DOTALL option.

* * This call is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceAll("(?s)" + regex, N.EMPTY_STRING)}
  • *
  • {@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(N.EMPTY_STRING)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.removePattern(null, *)       = null
     * StringUtils.removePattern("any", (String) null)   = "any"
     * StringUtils.removePattern("A<__>\n<__>B", "<.*>")  = "AB"
     * StringUtils.removePattern("ABCabc123", "[a-z]")    = "ABC123"
     * 
* * @param text * the source string * @param regex * the regular expression to which this string is to be matched * @return The resulting {@code String} * @see #replacePattern(String, String, String) * @see String#replaceAll(String, String) * @see Pattern#DOTALL */ public static String removePattern(final String text, final String regex) { return replacePattern(text, regex, N.EMPTY_STRING); } /** *

Replaces the first substring of the text string that matches the given regular expression pattern * with the given replacement.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code pattern.matcher(text).replaceFirst(replacement)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.replaceFirst(null, *, *)       = null
     * StringUtils.replaceFirst("any", (Pattern) null, *)   = "any"
     * StringUtils.replaceFirst("any", *, null)   = "any"
     * StringUtils.replaceFirst("", Pattern.compile(""), "zzz")    = "zzz"
     * StringUtils.replaceFirst("", Pattern.compile(".*"), "zzz")  = "zzz"
     * StringUtils.replaceFirst("", Pattern.compile(".+"), "zzz")  = ""
     * StringUtils.replaceFirst("abc", Pattern.compile(""), "ZZ")  = "ZZabc"
     * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("<.*>"), "z")      = "z\n<__>"
     * StringUtils.replaceFirst("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")  = "z"
     * StringUtils.replaceFirst("ABCabc123", Pattern.compile("[a-z]"), "_")          = "ABC_bc123"
     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123abc"
     * StringUtils.replaceFirst("ABCabc123abc", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123abc"
     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum  dolor   sit"
     * 
* * @param text text to search and replace in, may be null * @param regex the regular expression pattern to which this string is to be matched * @param replacement the string to be substituted for the first match * @return the text with the first replacement processed, * {@code null} if null String input * * @see java.util.regex.Matcher#replaceFirst(String) * @see java.util.regex.Pattern */ public static String replaceFirst(final String text, final Pattern regex, final String replacement) { if (text == null || regex == null || replacement == null) { return text; } return regex.matcher(text).replaceFirst(replacement); } /** *

Replaces the first substring of the text string that matches the given regular expression * with the given replacement.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceFirst(regex, replacement)}
  • *
  • {@code Pattern.compile(regex).matcher(text).replaceFirst(replacement)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *

The {@link Pattern#DOTALL} option is NOT automatically added. * To use the DOTALL option prepend "(?s)" to the regex. * DOTALL is also known as single-line mode in Perl.

* *
     * StringUtils.replaceFirst(null, *, *)       = null
     * StringUtils.replaceFirst("any", (String) null, *)   = "any"
     * StringUtils.replaceFirst("any", *, null)   = "any"
     * StringUtils.replaceFirst("", "", "zzz")    = "zzz"
     * StringUtils.replaceFirst("", ".*", "zzz")  = "zzz"
     * StringUtils.replaceFirst("", ".+", "zzz")  = ""
     * StringUtils.replaceFirst("abc", "", "ZZ")  = "ZZabc"
     * StringUtils.replaceFirst("<__>\n<__>", "<.*>", "z")      = "z\n<__>"
     * StringUtils.replaceFirst("<__>\n<__>", "(?s)<.*>", "z")  = "z"
     * StringUtils.replaceFirst("ABCabc123", "[a-z]", "_")          = "ABC_bc123"
     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "_")  = "ABC_123abc"
     * StringUtils.replaceFirst("ABCabc123abc", "[^A-Z0-9]+", "")   = "ABC123abc"
     * StringUtils.replaceFirst("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum  dolor   sit"
     * 
* * @param text text to search and replace in, may be null * @param regex the regular expression to which this string is to be matched * @param replacement the string to be substituted for the first match * @return the text with the first replacement processed, * {@code null} if null String input * * @throws java.util.regex.PatternSyntaxException * if the regular expression's syntax is invalid * * @see String#replaceFirst(String, String) * @see java.util.regex.Pattern * @see java.util.regex.Pattern#DOTALL */ public static String replaceFirst(final String text, final String regex, final String replacement) { if (text == null || regex == null || replacement == null) { return text; } return text.replaceFirst(regex, replacement); } /** *

Replaces each substring of the text String that matches the given regular expression pattern with the given replacement.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code pattern.matcher(text).replaceAll(replacement)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.replaceAll(null, *, *)       = null
     * StringUtils.replaceAll("any", (Pattern) null, *)   = "any"
     * StringUtils.replaceAll("any", *, null)   = "any"
     * StringUtils.replaceAll("", Pattern.compile(""), "zzz")    = "zzz"
     * StringUtils.replaceAll("", Pattern.compile(".*"), "zzz")  = "zzz"
     * StringUtils.replaceAll("", Pattern.compile(".+"), "zzz")  = ""
     * StringUtils.replaceAll("abc", Pattern.compile(""), "ZZ")  = "ZZaZZbZZcZZ"
     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>"), "z")                 = "z\nz"
     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("<.*>", Pattern.DOTALL), "z") = "z"
     * StringUtils.replaceAll("<__>\n<__>", Pattern.compile("(?s)<.*>"), "z")             = "z"
     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[a-z]"), "_")       = "ABC___123"
     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "_")  = "ABC_123"
     * StringUtils.replaceAll("ABCabc123", Pattern.compile("[^A-Z0-9]+"), "")   = "ABC123"
     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", Pattern.compile("( +)([a-z]+)"), "_$2")  = "Lorem_ipsum_dolor_sit"
     * 
* * @param text text to search and replace in, may be null * @param regex the regular expression pattern to which this string is to be matched * @param replacement the string to be substituted for each match * @return the text with any replacements processed, * {@code null} if null String input * * @see java.util.regex.Matcher#replaceAll(String) * @see java.util.regex.Pattern */ public static String replaceAll(final String text, final Pattern regex, final String replacement) { if (text == null || regex == null || replacement == null) { return text; } return regex.matcher(text).replaceAll(replacement); } /** *

Replaces each substring of the text String that matches the given regular expression * with the given replacement.

* * This method is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceAll(regex, replacement)}
  • *
  • {@code Pattern.compile(regex).matcher(text).replaceAll(replacement)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *

Unlike in the {@link #replacePattern(String, String, String)} method, the {@link Pattern#DOTALL} option * is NOT automatically added. * To use the DOTALL option prepend "(?s)" to the regex. * DOTALL is also known as single-line mode in Perl.

* *
     * StringUtils.replaceAll(null, *, *)       = null
     * StringUtils.replaceAll("any", (String) null, *)   = "any"
     * StringUtils.replaceAll("any", *, null)   = "any"
     * StringUtils.replaceAll("", "", "zzz")    = "zzz"
     * StringUtils.replaceAll("", ".*", "zzz")  = "zzz"
     * StringUtils.replaceAll("", ".+", "zzz")  = ""
     * StringUtils.replaceAll("abc", "", "ZZ")  = "ZZaZZbZZcZZ"
     * StringUtils.replaceAll("<__>\n<__>", "<.*>", "z")      = "z\nz"
     * StringUtils.replaceAll("<__>\n<__>", "(?s)<.*>", "z")  = "z"
     * StringUtils.replaceAll("ABCabc123", "[a-z]", "_")       = "ABC___123"
     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
     * StringUtils.replaceAll("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
     * StringUtils.replaceAll("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
     * 
* * @param text text to search and replace in, may be null * @param regex the regular expression to which this string is to be matched * @param replacement the string to be substituted for each match * @return the text with any replacements processed, * {@code null} if null String input * * @throws java.util.regex.PatternSyntaxException * if the regular expression's syntax is invalid * * @see #replacePattern(String, String, String) * @see String#replaceAll(String, String) * @see java.util.regex.Pattern * @see java.util.regex.Pattern#DOTALL */ public static String replaceAll(final String text, final String regex, final String replacement) { if (text == null || regex == null || replacement == null) { return text; } return text.replaceAll(regex, replacement); } /** *

Replaces each substring of the source String that matches the given regular expression with the given * replacement using the {@link Pattern#DOTALL} option. DOTALL is also known as single-line mode in Perl.

* * This call is a {@code null} safe equivalent to: *
    *
  • {@code text.replaceAll("(?s)" + regex, replacement)}
  • *
  • {@code Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement)}
  • *
* *

A {@code null} reference passed to this method is a no-op.

* *
     * StringUtils.replacePattern(null, *, *)       = null
     * StringUtils.replacePattern("any", (String) null, *)   = "any"
     * StringUtils.replacePattern("any", *, null)   = "any"
     * StringUtils.replacePattern("", "", "zzz")    = "zzz"
     * StringUtils.replacePattern("", ".*", "zzz")  = "zzz"
     * StringUtils.replacePattern("", ".+", "zzz")  = ""
     * StringUtils.replacePattern("<__>\n<__>", "<.*>", "z")       = "z"
     * StringUtils.replacePattern("ABCabc123", "[a-z]", "_")       = "ABC___123"
     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "_")  = "ABC_123"
     * StringUtils.replacePattern("ABCabc123", "[^A-Z0-9]+", "")   = "ABC123"
     * StringUtils.replacePattern("Lorem ipsum  dolor   sit", "( +)([a-z]+)", "_$2")  = "Lorem_ipsum_dolor_sit"
     * 
* * @param text * the source string * @param regex * the regular expression to which this string is to be matched * @param replacement * the string to be substituted for each match * @return The resulting {@code String} * @see #replaceAll(String, String, String) * @see String#replaceAll(String, String) * @see Pattern#DOTALL */ public static String replacePattern(final String text, final String regex, final String replacement) { if (text == null || regex == null || replacement == null) { return text; } return Pattern.compile(regex, Pattern.DOTALL).matcher(text).replaceAll(replacement); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy