All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cn.hutool.dfa.SensitiveUtil Maven / Gradle / Ivy

Go to download

Hutool是一个小而全的Java工具类库,通过静态方法封装,降低相关API的学习成本,提高工作效率,使Java拥有函数式语言般的优雅,让Java语言也可以“甜甜的”。

There is a newer version: 5.8.34
Show newest version
package cn.hutool.dfa;

import cn.hutool.core.collection.CollUtil;
import cn.hutool.core.lang.Filter;
import cn.hutool.core.thread.ThreadUtil;
import cn.hutool.core.util.StrUtil;
import cn.hutool.json.JSONUtil;

import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * 敏感词工具类
 *
 * @author Looly
 */
public final class SensitiveUtil {

	public static final char DEFAULT_SEPARATOR = StrUtil.C_COMMA;
	private static final WordTree sensitiveTree = new WordTree();

	/**
	 * @return 是否已经被初始化
	 */
	public static boolean isInited() {
		return false == sensitiveTree.isEmpty();
	}

	/**
	 * 初始化敏感词树
	 *
	 * @param isAsync        是否异步初始化
	 * @param sensitiveWords 敏感词列表
	 */
	public static void init(final Collection sensitiveWords, boolean isAsync) {
		if (isAsync) {
			ThreadUtil.execAsync(() -> {
				init(sensitiveWords);
				return true;
			});
		} else {
			init(sensitiveWords);
		}
	}

	/**
	 * 初始化敏感词树
	 *
	 * @param sensitiveWords 敏感词列表
	 */
	public static void init(Collection sensitiveWords) {
		sensitiveTree.clear();
		sensitiveTree.addWords(sensitiveWords);
//		log.debug("Sensitive init finished, sensitives: {}", sensitiveWords);
	}

	/**
	 * 初始化敏感词树
	 *
	 * @param sensitiveWords 敏感词列表组成的字符串
	 * @param isAsync        是否异步初始化
	 * @param separator      分隔符
	 */
	public static void init(String sensitiveWords, char separator, boolean isAsync) {
		if (StrUtil.isNotBlank(sensitiveWords)) {
			init(StrUtil.split(sensitiveWords, separator), isAsync);
		}
	}

	/**
	 * 初始化敏感词树,使用逗号分隔每个单词
	 *
	 * @param sensitiveWords 敏感词列表组成的字符串
	 * @param isAsync        是否异步初始化
	 */
	public static void init(String sensitiveWords, boolean isAsync) {
		init(sensitiveWords, DEFAULT_SEPARATOR, isAsync);
	}

	/**
	 * 设置字符过滤规则,通过定义字符串过滤规则,过滤不需要的字符
* 当accept为false时,此字符不参与匹配 * * @param charFilter 过滤函数 * @since 5.4.4 */ public static void setCharFilter(Filter charFilter) { if (charFilter != null) { sensitiveTree.setCharFilter(charFilter); } } /** * 是否包含敏感词 * * @param text 文本 * @return 是否包含 */ public static boolean containsSensitive(String text) { return sensitiveTree.isMatch(text); } /** * 是否包含敏感词 * * @param obj bean,会被转为JSON字符串 * @return 是否包含 */ public static boolean containsSensitive(Object obj) { return sensitiveTree.isMatch(JSONUtil.toJsonStr(obj)); } /** * 查找敏感词,返回找到的第一个敏感词 * * @param text 文本 * @return 敏感词 * @since 5.5.3 */ public static FoundWord getFoundFirstSensitive(String text) { return sensitiveTree.matchWord(text); } /** * 查找敏感词,返回找到的第一个敏感词 * * @param obj bean,会被转为JSON字符串 * @return 敏感词 */ public static FoundWord getFoundFirstSensitive(Object obj) { return sensitiveTree.matchWord(JSONUtil.toJsonStr(obj)); } /** * 查找敏感词,返回找到的所有敏感词 * * @param text 文本 * @return 敏感词 * @since 5.5.3 */ public static List getFoundAllSensitive(String text) { return sensitiveTree.matchAllWords(text); } /** * 查找敏感词,返回找到的所有敏感词
* 密集匹配原则:假如关键词有 ab,b,文本是abab,将匹配 [ab,b,ab]
* 贪婪匹配(最长匹配)原则:假如关键字a,ab,最长匹配将匹配[a, ab] * * @param text 文本 * @param isDensityMatch 是否使用密集匹配原则 * @param isGreedMatch 是否使用贪婪匹配(最长匹配)原则 * @return 敏感词 */ public static List getFoundAllSensitive(String text, boolean isDensityMatch, boolean isGreedMatch) { return sensitiveTree.matchAllWords(text, -1, isDensityMatch, isGreedMatch); } /** * 查找敏感词,返回找到的所有敏感词 * * @param bean 对象,会被转为JSON * @return 敏感词 * @since 5.5.3 */ public static List getFoundAllSensitive(Object bean) { return sensitiveTree.matchAllWords(JSONUtil.toJsonStr(bean)); } /** * 查找敏感词,返回找到的所有敏感词
* 密集匹配原则:假如关键词有 ab,b,文本是abab,将匹配 [ab,b,ab]
* 贪婪匹配(最长匹配)原则:假如关键字a,ab,最长匹配将匹配[a, ab] * * @param bean 对象,会被转为JSON * @param isDensityMatch 是否使用密集匹配原则 * @param isGreedMatch 是否使用贪婪匹配(最长匹配)原则 * @return 敏感词 * @since 5.5.3 */ public static List getFoundAllSensitive(Object bean, boolean isDensityMatch, boolean isGreedMatch) { return getFoundAllSensitive(JSONUtil.toJsonStr(bean), isDensityMatch, isGreedMatch); } /** * 敏感词过滤 * * @param bean 对象,会被转为JSON * @param isGreedMatch 贪婪匹配(最长匹配)原则:假如关键字a,ab,最长匹配将匹配[a, ab] * @param sensitiveProcessor 敏感词处理器,默认按匹配内容的字符数替换成* * @param bean的class类型 * @return 敏感词过滤处理后的bean对象 */ public static T sensitiveFilter(T bean, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) { final String jsonText = JSONUtil.toJsonStr(bean); @SuppressWarnings("unchecked") final Class c = (Class) bean.getClass(); return JSONUtil.toBean(sensitiveFilter(jsonText, isGreedMatch, sensitiveProcessor), c); } /** * 处理过滤文本中的敏感词,默认替换成* * * @param text 文本 * @return 敏感词过滤处理后的文本 * @since 5.7.21 */ public static String sensitiveFilter(String text) { return sensitiveFilter(text, true, null); } /** * 处理过滤文本中的敏感词,默认替换成* * * @param text 文本 * @param isGreedMatch 贪婪匹配(最长匹配)原则:假如关键字a,ab,最长匹配将匹配[a, ab] * @param sensitiveProcessor 敏感词处理器,默认按匹配内容的字符数替换成* * @return 敏感词过滤处理后的文本 */ public static String sensitiveFilter(String text, boolean isGreedMatch, SensitiveProcessor sensitiveProcessor) { if (StrUtil.isEmpty(text)) { return text; } //敏感词过滤场景下,不需要密集匹配 final List foundWordList = getFoundAllSensitive(text, true, isGreedMatch); if (CollUtil.isEmpty(foundWordList)) { return text; } sensitiveProcessor = sensitiveProcessor == null ? new SensitiveProcessor() { } : sensitiveProcessor; final Map foundWordMap = new HashMap<>(foundWordList.size(), 1); foundWordList.forEach(foundWord -> foundWordMap.put(foundWord.getStartIndex(), foundWord)); final int length = text.length(); final StringBuilder textStringBuilder = new StringBuilder(); for (int i = 0; i < length; i++) { final FoundWord fw = foundWordMap.get(i); if (fw != null) { textStringBuilder.append(sensitiveProcessor.process(fw)); i = fw.getEndIndex(); } else { textStringBuilder.append(text.charAt(i)); } } return textStringBuilder.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy