cn.enilu.flash.web.HtmlCleaner Maven / Gradle / Ivy
package cn.enilu.flash.web;
import cn.enilu.flash.core.lang.Strings;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.safety.Cleaner;
import org.jsoup.safety.Whitelist;
//see http://jsoup.org/cookbook/cleaning-html/whitelist-sanitizer
public class HtmlCleaner {
public String clean(String html) {
if (Strings.isBlank(html)) {
return "";
}
Document doc = Jsoup.parse(html);
Whitelist whitelist = Whitelist.basicWithImages();
whitelist.addAttributes("span","style");
whitelist.addAttributes("font","color");
whitelist.addAttributes("p","style");
Cleaner cleaner = new Cleaner(whitelist);
doc = cleaner.clean(doc);
for (Element e : doc.select("[style]")) {
String style = e.attr("style");
style = cleanStyle(style);
if (style.isEmpty()) {
e.removeAttr("style");
} else {
e.attr("style", style);
}
}
return doc.select("body").html();
}
private String cleanStyle(String style) {
if (style == null) {
return "";
}
style = style.toLowerCase();
//disable javascript/expression
if (style.contains("javascript") || style.contains("expression")) {
return "";
}
// /*xxx*/
style = style.replaceAll("\\*", "");
// x
style = style.replaceAll("&", "");
return style;
}
public static void main(String[] args){
String test = "\n" +
" 来火球社区签到有什么好处呢?今天是圣诞节,有礼品发给我们火星人吗?\n" +
"
";
System.out.println(new HtmlCleaner().clean(test));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy