com.github.xiaoyuge5201.file.HtmlUtil Maven / Gradle / Ivy
package com.github.xiaoyuge5201.file;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HtmlUtil {
/**
* 標簽的正则
*/
private String reg = "(?i)\\]*\\>";
private Pattern pattern = Pattern.compile(reg);
/**
* li标签中的span元素,不能设置css,不然pdf无法识别
*/
private String spanReg = "(?i)\\]*\\>";
private Pattern spanPattern = Pattern.compile(spanReg);
/**
* li标签,不能设置css,不然pdf无法识别
*/
/* private String liReg = "(?i)\\]*\\>";
private Pattern liPattern = Pattern.compile(liReg);*/
/**
* br标签元素,不能设置css,不然pdf无法识别
*/
private String brReg = "(?i)\\
]*\\>";
private Pattern brPattern = Pattern.compile(brReg);
/**
* img标签需要闭合
*/
private String imgReg = "(?i)\\]*\\>";
private Pattern imgPattern = Pattern.compile(imgReg);
/**
* div标签元素,不能设置css,不然pdf无法识别
*/
private String fontFamilyReg = "(?i)font-family[^\\;]*\\;";
private Pattern fontFamilyPattern = Pattern.compile(fontFamilyReg);
/**
* 包含html标签
*/
String regForHtml = "<([^>]*)>";
Pattern htmlPattern = Pattern.compile(regForHtml);
/**
* 闭合
标签以及替换无效的font标签;
* 对于导出pdf而言,如果外面设置了字体,那么在文本内容中存在font-family属性,页面不显示;
* 原因是pdf会采用行内的样式,但是pdf对于其他字体兼容性问题,导致不显示
* @param content
* @return
*/
public String dealHtmlContent(String content){
//1. 闭合br标签,如果br标签存在样式,也去掉
Matcher brMatcher = brPattern.matcher(content);
if(brMatcher.find()){
content = content.replaceAll(brReg,"
");
}
Matcher imgMatcher = imgPattern.matcher(content);
if (imgMatcher.find() && !imgMatcher.group().endsWith("/>")) {
content = content.replaceAll(imgReg, imgMatcher.group().trim().replaceAll(">", "/>"));
}
//去掉li标签中的样式
/* Matcher liMatcher = liPattern.matcher(content);
if(liMatcher.find()){
content = content.replaceAll(liReg," ");
}*/
// 去除font标签
Matcher fontMatcher = pattern.matcher(content);
if(fontMatcher.find()){
//System.out.println(matcher.group());
content = content.replaceAll(reg,"").replaceAll(" ","");
}
//去除span的样式
Matcher spanMatcher = spanPattern.matcher(content);
if(spanMatcher.find()){
content = content.replaceAll(spanReg,"");
}
//去除font-family
Matcher fontFamilyMatcher = fontFamilyPattern.matcher(content);
if(fontFamilyMatcher.find()){
content = content.replaceAll(fontFamilyReg,"");
}
if(content.contains(""")){
content = content.replaceAll(""","'");
}
if(content.contains("")){
content = content.replaceAll("","").replaceAll(" ","");
}
return content;
}
/**
* 判断字符串是否包含html标签
* @param str 字符串
* @return 结果
*/
private boolean judgeStrContainsHtmlTag(String str) {
Matcher matcher = htmlPattern.matcher(str);
return matcher.find();
}
}