All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.tmsps.ne4spring.utils.HtmlUtil Maven / Gradle / Ivy

There is a newer version: 999.0.0.0
Show newest version
package com.tmsps.ne4spring.utils;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * 
 * @author zhangwei [email protected]
 *
 */
public class HtmlUtil {

	public static String html2Text(String inputString) {
		if (inputString==null||"".equals(inputString)) {
			return "";
		}
		String htmlStr = inputString; // 含html标签的字符串
		String textStr = "";
		java.util.regex.Pattern p_script;
		java.util.regex.Matcher m_script;
		java.util.regex.Pattern p_style;
		java.util.regex.Matcher m_style;
		java.util.regex.Pattern p_html;
		java.util.regex.Matcher m_html;

		java.util.regex.Pattern p_html1;
		java.util.regex.Matcher m_html1;

		try {
			String regEx_script = "<[//s]*?script[^>]*?>[//s//S]*?<[//s]*?///[//s]*?script[//s]*?>"; // 定义script的正则表达式{或]*?>[//s//S]*?
			String regEx_style = "<[//s]*?style[^>]*?>[//s//S]*?<[//s]*?///[//s]*?style[//s]*?>"; // 定义style的正则表达式{或]*?>[//s//S]*?
			String regEx_html = "<[^>]+>"; // 定义HTML标签的正则表达式
			String regEx_html1 = "<[^>]+";
			p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE);
			m_script = p_script.matcher(htmlStr);
			htmlStr = m_script.replaceAll(""); // 过滤script标签

			p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE);
			m_style = p_style.matcher(htmlStr);
			htmlStr = m_style.replaceAll(""); // 过滤style标签

			p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE);
			m_html = p_html.matcher(htmlStr);
			htmlStr = m_html.replaceAll(""); // 过滤html标签

			p_html1 = Pattern.compile(regEx_html1, Pattern.CASE_INSENSITIVE);
			m_html1 = p_html1.matcher(htmlStr);
			htmlStr = m_html1.replaceAll(""); // 过滤html标签

			textStr = htmlStr;

		} catch (Exception e) {
			System.err.println("Html2Text: " + e.getMessage());
		}

		return textStr;// 返回文本字符串
	}
	

	public static String htmlspecialchars(String str) {
		str = str.replaceAll("&", "&");
		str = str.replaceAll("<", "<");
		str = str.replaceAll(">", ">");
		str = str.replaceAll("\"", """);
		return str;
	}
	
	public static String htmlbackspecialchars(String str) {
		str = str.replaceAll("&", "&");
		str = str.replaceAll("<", "<");
		str = str.replaceAll(">", ">");
		str = str.replaceAll(""", "\"");
		return str;
	}
	/**
	 * 过滤html标签
	 * @param str 要过滤的字符串
	 * @return
	 */
	public static String filterHtml(String str) {
		String regxpForHtml = "<.*?>| ";    //<([^>]*)>
		Pattern pattern = Pattern.compile(regxpForHtml);
		Matcher matcher = pattern.matcher(str);
		StringBuffer sb = new StringBuffer();
		boolean result1 = matcher.find();
	    while (result1) {
	    matcher.appendReplacement(sb, "");
		  result1 = matcher.find();
	    }
		matcher.appendTail(sb);
		return sb.toString();
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy