All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.taotao.boot.sensitive.wordother.bs.SensitiveWordBs Maven / Gradle / Ivy

/*
 * Copyright (c) 2020-2030, Shuigedeng ([email protected] & https://blog.taotaocloud.top/).
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.taotao.boot.sensitive.wordother.bs;

import com.taotao.boot.common.constant.CommonConstant;
import com.taotao.boot.common.support.handler.IHandler;
import com.taotao.boot.common.utils.collection.CollectionUtils;
import com.taotao.boot.common.utils.common.ArgUtils;
import com.taotao.boot.sensitive.wordother.api.ISensitiveWordReplace;
import com.taotao.boot.sensitive.wordother.api.IWordAllow;
import com.taotao.boot.sensitive.wordother.api.IWordContext;
import com.taotao.boot.sensitive.wordother.api.IWordDeny;
import com.taotao.boot.sensitive.wordother.api.IWordMap;
import com.taotao.boot.sensitive.wordother.api.IWordResult;
import com.taotao.boot.sensitive.wordother.api.IWordResultHandler;
import com.taotao.boot.sensitive.wordother.support.allow.WordAllows;
import com.taotao.boot.sensitive.wordother.support.deny.WordDenys;
import com.taotao.boot.sensitive.wordother.support.map.SensitiveWordMap;
import com.taotao.boot.sensitive.wordother.support.replace.SensitiveWordReplaceChar;
import com.taotao.boot.sensitive.wordother.support.result.WordResultHandlers;
import com.taotao.boot.sensitive.wordother.utils.InnerFormatUtils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/** 敏感词引导类 */
public class SensitiveWordBs {

    /** 私有化构造器 */
    private SensitiveWordBs() {}

    /** 敏感词 map */
    private IWordMap sensitiveWordMap;

    /** 默认的执行上下文 */
    private final IWordContext context = buildDefaultContext();

    /** 禁止的单词 */
    private IWordDeny wordDeny = WordDenys.system();

    /** 允许的单词 */
    private IWordAllow wordAllow = WordAllows.system();

    /**
     * DCL 初始化 wordMap 信息
     *
     * 

注意:map 的构建是一个比较耗时的动作 */ private synchronized void initWordMap() { // 加载配置信息 List denyList = wordDeny.deny(); List allowList = wordAllow.allow(); List results = getActualDenyList(denyList, allowList); // 初始化 DFA 信息 if (sensitiveWordMap == null) { sensitiveWordMap = new SensitiveWordMap(); } // 便于可以多次初始化 sensitiveWordMap.initWordMap(results); } /** * 获取禁止列表中真正的禁止词汇 * * @param denyList 禁止 * @param allowList 允许 * @return 结果 */ List getActualDenyList(List denyList, List allowList) { if (CollectionUtils.isEmpty(denyList)) { return Collections.emptyList(); } if (CollectionUtils.isEmpty(allowList)) { return denyList; } List formatDenyList = this.formatWordList(denyList); List formatAllowList = this.formatWordList(allowList); List resultList = new ArrayList<>(); // O(1) Set allowSet = new HashSet<>(formatAllowList); for (String deny : formatDenyList) { if (allowSet.contains(deny)) { continue; } resultList.add(deny); } return resultList; } /** * 数据格式化处理 * * @param list 列表 * @return 结果 */ private List formatWordList(List list) { if (CollectionUtils.isEmpty(list)) { return list; } List resultList = new ArrayList<>(list.size()); for (String word : list) { String formatWord = InnerFormatUtils.format(word, this.context); resultList.add(formatWord); } return resultList; } /** * 新建验证实例 * *

double-lock * * @return this */ public static SensitiveWordBs newInstance() { return new SensitiveWordBs(); } /** * 初始化 * *

1. 根据配置,初始化对应的 map。比较消耗性能。 * * @return this */ public SensitiveWordBs init() { this.initWordMap(); return this; } /** * 设置禁止的实现 * * @param wordDeny 禁止的实现 * @return this */ public SensitiveWordBs wordDeny(IWordDeny wordDeny) { ArgUtils.notNull(wordDeny, "wordDeny"); this.wordDeny = wordDeny; return this; } /** * 设置允许的实现 * * @param wordAllow 允许的实现 * @return this */ public SensitiveWordBs wordAllow(IWordAllow wordAllow) { ArgUtils.notNull(wordAllow, "wordAllow"); this.wordAllow = wordAllow; return this; } /** * 设置是否启动数字检测 * * @param enableNumCheck 数字检测 * @return this */ public SensitiveWordBs enableNumCheck(boolean enableNumCheck) { this.context.sensitiveCheckNum(enableNumCheck); return this; } /** * 设置是否启动 email 检测 * * @param enableEmailCheck email 检测 * @return this */ public SensitiveWordBs enableEmailCheck(boolean enableEmailCheck) { this.context.sensitiveCheckEmail(enableEmailCheck); return this; } /** * 设置是否启动 url 检测 * * @param enableUrlCheck url 检测 * @return this */ public SensitiveWordBs enableUrlCheck(boolean enableUrlCheck) { this.context.sensitiveCheckUrl(enableUrlCheck); return this; } /** * 是否忽略大小写 * * @param ignoreCase 大小写 * @return this */ public SensitiveWordBs ignoreCase(boolean ignoreCase) { this.context.ignoreCase(ignoreCase); return this; } /** * 是否忽略半角全角 * * @param ignoreWidth 半角全角 * @return this */ public SensitiveWordBs ignoreWidth(boolean ignoreWidth) { this.context.ignoreWidth(ignoreWidth); return this; } /** * 是否忽略数字格式 * * @param ignoreNumStyle 数字格式 * @return this */ public SensitiveWordBs ignoreNumStyle(boolean ignoreNumStyle) { this.context.ignoreNumStyle(ignoreNumStyle); return this; } /** * 是否忽略中文样式 * * @param ignoreChineseStyle 中文样式 * @return this */ public SensitiveWordBs ignoreChineseStyle(boolean ignoreChineseStyle) { this.context.ignoreChineseStyle(ignoreChineseStyle); return this; } /** * 是否忽略英文样式 * * @param ignoreEnglishStyle 英文样式 * @return this */ public SensitiveWordBs ignoreEnglishStyle(boolean ignoreEnglishStyle) { this.context.ignoreEnglishStyle(ignoreEnglishStyle); return this; } /** * 是否忽略重复 * * @param ignoreRepeat 忽略重复 * @return this */ public SensitiveWordBs ignoreRepeat(boolean ignoreRepeat) { this.context.ignoreRepeat(ignoreRepeat); return this; } /** * 构建默认的上下文 * * @return 结果 */ private IWordContext buildDefaultContext() { IWordContext wordContext = SensitiveWordContext.newInstance(); // 格式统一化 wordContext.ignoreCase(true); wordContext.ignoreWidth(true); wordContext.ignoreNumStyle(true); wordContext.ignoreChineseStyle(true); wordContext.ignoreEnglishStyle(true); wordContext.ignoreRepeat(false); // 开启校验 wordContext.sensitiveCheckNum(true); wordContext.sensitiveCheckEmail(true); wordContext.sensitiveCheckUrl(true); return wordContext; } /** * 是否包含敏感词 * * @param target 目标字符串 * @return 是否 */ public boolean contains(final String target) { statusCheck(); return sensitiveWordMap.contains(target, context); } /** * 返回所有的敏感词 1. 这里是默认去重的,且是有序的。 2. 如果不存在,返回空列表 * * @param target 目标字符串 * @return 敏感词列表 */ public List findAll(final String target) { return findAll(target, WordResultHandlers.word()); } /** * 返回第一个敏感词 (1)如果不存在,则返回 {@code null} * * @param target 目标字符串 * @return 敏感词 */ public String findFirst(final String target) { return findFirst(target, WordResultHandlers.word()); } /** * 返回所有的敏感词 1. 这里是默认去重的,且是有序的。 2. 如果不存在,返回空列表 * * @param target 目标字符串 * @param 泛型 * @param handler 处理类 * @return 敏感词列表 */ public List findAll(final String target, final IWordResultHandler handler) { ArgUtils.notNull(handler, "handler"); statusCheck(); List wordResults = sensitiveWordMap.findAll(target, context); return CollectionUtils.toList(wordResults, new IHandler() { @Override public R handle(IWordResult wordResult) { return handler.handle(wordResult); } }); } /** * 返回第一个敏感词 (1)如果不存在,则返回 {@code null} * * @param target 目标字符串 * @param handler 处理类 * @param 泛型 * @return 敏感词 */ public R findFirst(final String target, final IWordResultHandler handler) { ArgUtils.notNull(handler, "handler"); statusCheck(); IWordResult wordResult = sensitiveWordMap.findFirst(target, context); return handler.handle(wordResult); } /** * 替换所有内容 * * @param target 目标字符串 * @param replaceChar 替换为的 char * @return 替换后结果 */ public String replace(final String target, final char replaceChar) { ISensitiveWordReplace replace = new SensitiveWordReplaceChar(replaceChar); return replace(target, replace); } /** * 替换所有内容 * * @param target 目标字符串 * @param replace 替换策略 * @return 替换后结果 */ public String replace(final String target, final ISensitiveWordReplace replace) { statusCheck(); return sensitiveWordMap.replace(target, replace, context); } /** * 替换所有内容 1. 默认使用空格替换,避免星号改变 md 的格式。 * * @param target 目标字符串 * @return 替换后结果 */ public String replace(final String target) { return this.replace(target, CommonConstant.STAR); } /** 状态校验 */ private void statusCheck() { // DLC if (sensitiveWordMap == null) { synchronized (this) { if (sensitiveWordMap == null) { this.init(); } } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy