All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.feilong.core.util.RegexUtil Maven / Gradle / Ivy

Go to download

feilong is a suite of core and expanded libraries that include utility classes, http, excel,cvs, io classes, and much much more.

There is a newer version: 4.0.8
Show newest version
/*
 * Copyright (C) 2008 feilong
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.feilong.core.util;

import static com.feilong.core.util.MapUtil.newLinkedHashMap;
import static java.util.Collections.emptyMap;

import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.feilong.core.Validate;

/**
 * 正则表达式工具类.
 *
 * @author feilong
 * @see "RegexPattern"
 * @see java.util.regex.Pattern
 * @see java.util.regex.Matcher
 * @see java.lang.String#matches(String)
 * @since 1.0.0
 * @since jdk1.4
 */
public final class RegexUtil{

    /** The Constant LOGGER. */
    private static final Logger               LOGGER        = LoggerFactory.getLogger(RegexUtil.class);

    /**
     * Pattern cache提高速度.
     *
     * @see RegexUtil 加上缓存
     * @since 1.10.6
     */
    private static final Map PATTERN_CACHE = new ConcurrentHashMap<>();

    //---------------------------------------------------------------

    /** Don't let anyone instantiate this class. */
    private RegexUtil(){
        //AssertionError不是必须的. 但它可以避免不小心在类的内部调用构造器. 保证该类在任何情况下都不会被实例化.
        //see 《Effective Java》 2nd
        throw new AssertionError("No " + getClass().getName() + " instances for you!");
    }

    //---------------------------------------------------------------

    /**
     * 编译给定正则表达式 regexPattern ,并尝试将给定输入 input 与其匹配.
     * 
     * 

* {@link Pattern#matches(String, CharSequence)} 等价于{@link #getMatcher(String, CharSequence)}.matches(); *

* * @param regexPattern * 正则表达式字符串,pls use RegexPattern * @param input * The character sequence to be matched,support {@link String},{@link StringBuffer},{@link StringBuilder}... and so on * @return 如果 regexPattern 是null,抛出 {@link NullPointerException}
* 如果 input 是null,返回 false
* 如果input 符合 regex的正则表达式格式,返回true,否则返回 false;
* @see #getMatcher(String, CharSequence) * @see Matcher#matches() * @see Pattern#matches(String, CharSequence) * @since 1.0.7 * @since 1.13.2 change, if input is null, return false from NPE */ public static boolean matches(final String regexPattern,final CharSequence input){ Validate.notNull(regexPattern, "regexPattern can't be null!"); if (null == input){ return false; } return getMatcher(regexPattern, input).matches(); } //--------------------------------------------------------------- /** * 返回在以前匹配操作期间由给定组捕获的输入子序列. * *

* 对于匹配器 m、输入序列 s 和组索引 g,表达式 m.group(g) 和 s.substring(m.start(g), m.end(g))是等效的.
* 捕获组是从 1开始从左到右的索引.组0表示整个模式,因此表达式 m.group(0)等效于 m.group(). *

* *

示例:

* *
* *
     * String regexPattern = "(.*?)@(.*?)";
     * String email = "[email protected]";
     * 
     * RegexUtil.group(regexPattern, email);
     * 
* * 返回: * *
     *    0 [email protected]
     *    1 feilong
     *    2 163.com
     * 
* *
* * @param regexPattern * 正则表达式模式,比如 (.*?)@(.*?) * @param input * 需要被group的字符串,比如 [email protected],support {@link String},{@link StringBuffer},{@link StringBuilder}... and so on * @return 如果 regexPattern 是null,抛出 {@link NullPointerException}
* 如果 input 是null,抛出 {@link NullPointerException}
* 如果 匹配不了,返回 {@link java.util.Collections#emptyMap()} * @see #getMatcher(String, CharSequence) * @see Matcher#group(int) * @since 1.0.7 */ public static Map group(final String regexPattern,final CharSequence input){ Matcher matcher = getMatcher(regexPattern, input); if (!matcher.matches()){ LOGGER.trace("[not matches] ,\n\tregexPattern:[{}] \n\tinput:[{}]", regexPattern, input); return emptyMap(); } //--------------------------------------------------------------- int groupCount = matcher.groupCount(); Map map = newLinkedHashMap(groupCount + 1); for (int i = 0; i <= groupCount; ++i){ //匹配的索引 String groupValue = matcher.group(i); //map.put(0, matcher.group());// 捕获组是从 1 开始从左到右的索引.组0表示整个模式,因此表达式 m.group(0) 等效于 m.group(). LOGGER.trace("matcher group[{}],start-end:[{}-{}],groupValue:[{}]", i, matcher.start(i), matcher.end(i), groupValue); map.put(i, groupValue);//groupValue } //--------------------------------------------------------------- if (LOGGER.isTraceEnabled()){ LOGGER.trace("regexPattern:[{}],input:[{}],groupMap:{}", regexPattern, input, map); } return map; } /** * 返回在以前匹配操作期间由给定组捕获的输入子序列. * *

* 对于匹配器 m、输入序列 s 和组索引 g,表达式 m.group(g) 和 s.substring(m.start(g), m.end(g))是等效的.
* 捕获组是从 1开始从左到右的索引.组0表示整个模式,因此表达式 m.group(0)等效于 m.group(). *

* *

示例:

* *
* *
     * 
     * String regexPattern = "(.*?)@(.*?)";
     * String email = "[email protected]";
     * 
     * RegexUtil.group(regexPattern, email, 1);//feilong
     * RegexUtil.group(regexPattern, email, 2);//163.com
     * 
     * 
* *
* * @param regexPattern * 正则表达式模式,比如 (.*?)@(.*?) * @param input * 需要被group的字符串,比如 [email protected],support {@link String},{@link StringBuffer},{@link StringBuilder}... and so on * @param groupNo * 组号,从0开始 * @return 如果 regexPattern 是null,抛出 {@link NullPointerException}
* 如果 input 是null,抛出 {@link NullPointerException}
* 如果 {@code input < 0} ,抛出 {@link IllegalArgumentException}
* @see #getMatcher(String, CharSequence) * @see Matcher#group(int) * @since 1.0.7 */ public static String group(final String regexPattern,final CharSequence input,final int groupNo){ Validate.isTrue(groupNo >= 0, "groupNo must >=0"); Map map = group(regexPattern, input); return map.get(groupNo); } //--------------------------------------------------------------- /** * Gets the matcher. * * @param regexPattern * 正则表达式字符串,pls use {@link "RegexPattern"} * @param input * The character sequence to be matched,support {@link String},{@link StringBuffer},{@link StringBuilder}... and so on * @return 如果 regexPattern 是null,抛出 {@link NullPointerException}
* 如果 input 是null,抛出 {@link NullPointerException}
* @see Pattern#compile(String) * @since 1.0.7 */ private static Matcher getMatcher(final String regexPattern,final CharSequence input){ return getMatcher(regexPattern, input, 0); } /** * Gets the matcher. * * @param regexPattern * 正则表达式字符串,pls use {@link "RegexPattern"} * @param input * The character sequence to be matched,support {@link String},{@link StringBuffer},{@link StringBuilder}... and so on * @param flags * 如果需要多个组合,可以使用罗辑或 *
Pattern.compile(regex, CASE_INSENSITIVE | DOTALL);
*
    *
  • {@link Pattern#CASE_INSENSITIVE} 匹配字符时与大小写无关,该标志默认只考虑US ASCII字符.
  • *
  • {@link Pattern#MULTILINE} ^和$匹配一行的开始和结尾,而不是整个输入
  • *
  • {@link Pattern#UNICODE_CASE} 当与CASE_INSENSITIVE结合时,使用Unicode字母匹配
  • *
  • {@link Pattern#CANON_EQ} 考虑Unicode字符的规范等价
  • *
  • {@link Pattern#DOTALL} 当使用此标志时,.符号匹配包括行终止符在内的所有字符
  • *
  • {@link Pattern#UNIX_LINES} 当在多行模式下匹配^和$时,只将'\n'看作行终止符
  • *
  • {@link Pattern#LITERAL} 启用模式的字面值解析.
  • *
  • {@link Pattern#COMMENTS} 模式中允许空白和注释.
    * 此模式将忽略空白和在结束行之前以 # 开头的嵌入式注释.
    * 通过嵌入式标志表达式 (?x) 也可以启用注释模式.
    *
  • *
* @return 如果 regexPattern 是null,抛出 {@link NullPointerException}
* 如果 input 是null,抛出 {@link NullPointerException}
* @see Pattern#compile(String, int) * @since 1.5.3 */ private static Matcher getMatcher(final String regexPattern,final CharSequence input,final int flags){ Validate.notNull(regexPattern, "regexPattern can't be null!"); Validate.notNull(input, "input can't be null!"); Pattern pattern = buildPattern(regexPattern, flags); return pattern.matcher(input); } //--------------------------------------------------------------- /** * 如果cache中有,那么直接返回, 如果没有构造一个并塞到cache中. * * @param regexPattern * the regex pattern * @param flags * the flags * @return the pattern * @since 1.10.6 */ public static Pattern buildPattern(final String regexPattern,final int flags){ String key = regexPattern + "@" + flags; return PATTERN_CACHE.computeIfAbsent(key, k -> Pattern.compile(regexPattern, flags)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy