All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datastax.util.web.HtmlUtil Maven / Gradle / Ivy

package com.datastax.util.web;

import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtil {
    private final static String regxpForHtml = "<([^>]*)>"; // 过滤所有以<开头以>结尾的标签

    public HtmlUtil() {
    }

    public static void main(String[] args){
        String text="麻疹";
        System.out.println(HtmlUtil.filterHtml(text));
    }

    public String replaceTag(String input) {
        if (!hasSpecialChars(input)) {
            return input;
        }
        StringBuffer filtered = new StringBuffer(input.length());
        char c;
        for (int i = 0; i <= input.length() - 1; i++) {
            c = input.charAt(i);
            switch (c) {
                case '<':
                    filtered.append("<");
                    break;
                case '>':
                    filtered.append(">");
                    break;
                case '"':
                    filtered.append(""");
                    break;
                case '&':
                    filtered.append("&");
                    break;
                default:
                    filtered.append(c);
            }
        }
        return (filtered.toString());
    }

    public boolean hasSpecialChars(String input) {
        boolean flag = false;
        if ((input != null) && (input.length() > 0)) {
            char c;
            for (int i = 0; i <= input.length() - 1; i++) {
                c = input.charAt(i);
                switch (c) {
                    case '>':
                        flag = true;
                        break;
                    case '<':
                        flag = true;
                        break;
                    case '"':
                        flag = true;
                        break;
                    case '&':
                        flag = true;
                        break;
                }
            }
        }
        return flag;
    }

    public static String filterHtml(String str) {
        Pattern pattern = Pattern.compile(regxpForHtml);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, "");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }

    public static String filterHtmlForTemplate(String str) {
        Pattern pattern = Pattern.compile(regxpForHtml);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, " ");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);

        return sb.toString().replaceAll("[\\n|\\s]+"," ");
    }

    public static String fiterHtmlTag(String str, String tag) {
        String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";
        Pattern pattern = Pattern.compile(regxp);
        Matcher matcher = pattern.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result1 = matcher.find();
        while (result1) {
            matcher.appendReplacement(sb, "");
            result1 = matcher.find();
        }
        matcher.appendTail(sb);
        return sb.toString();
    }

    public static String clearStyleOrScript(String content){
        String result = content;
        result = result.replaceAll("<\\s*?(style)[^>]*>[\\s\\S]*?", "");
        result = result.replaceAll("<\\s*?(STYLE)[^>]*>[\\s\\S]*?", "");
        result = result.replaceAll("<\\s*?(script)[^>]*>[\\s\\S]*?", "");
        result = result.replaceAll("<\\s*?(SCRIPT)[^>]*>[\\s\\S]*?", "");
        return result;
    }

    public static String replaceHtmlTag(String str, String beforeTag,String tagAttrib, String startTag, String endTag) {
        String regxpForTag = "<\\s*" + beforeTag + "\\s+([^>]*)\\s*>";
        String regxpForTagAttrib = tagAttrib + "=\"([^\"]+)\"";
        Pattern patternForTag = Pattern.compile(regxpForTag);
        Pattern patternForAttrib = Pattern.compile(regxpForTagAttrib);
        Matcher matcherForTag = patternForTag.matcher(str);
        StringBuffer sb = new StringBuffer();
        boolean result = matcherForTag.find();
        while (result) {
            StringBuffer sbreplace = new StringBuffer();
            Matcher matcherForAttrib = patternForAttrib.matcher(matcherForTag.group(1));
            if (matcherForAttrib.find()) {
                matcherForAttrib.appendReplacement(sbreplace, startTag+ matcherForAttrib.group(1) + endTag);
            }
            matcherForTag.appendReplacement(sb, sbreplace.toString());
            result = matcherForTag.find();
        }
        matcherForTag.appendTail(sb);
        return sb.toString();
    }

    public static List getHtmlTag(String str, String tag) {
        String regxp = "<\\s*" + tag + "\\s+([^>]*)\\s*>";
        Pattern pattern = Pattern.compile(regxp);
        Matcher matcher = pattern.matcher(str);
        List list = new ArrayList();
        boolean result1 = matcher.find();
        while (result1) {
            list.add(matcher.group());
            result1 = matcher.find();
        }
        return list;
    }

    public static List getTagContent(String tag,String content){
        List list = new ArrayList();
        String reg = "<\\s*?("+tag+")[^>]*>[\\s\\S]*?";
        Pattern p = Pattern.compile(reg);
        Matcher m = p.matcher(content);
        while(m.find()){
            list.add(m.group());
        }
        return list;
    }

    public static String getTagProperty(String tagName , String property,String content){
        String str = "";
        Pattern p = Pattern.compile("<" + tagName + "[^>]*?" + property + "\\s*=\\s*([\"]?)([^\"]+)");
        Matcher m = p.matcher(content);
        if(m.find()){
            str = m.group();
        }
        str = str.replaceAll("<"+tagName+"[^>]*?"+property+"\\s*=\\s*([\"]?)", "");
        return str;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy