All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.xiaoyuge5201.file.HtmlUtil Maven / Gradle / Ivy

There is a newer version: 1.3.5
Show newest version
package com.github.xiaoyuge5201.file;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class HtmlUtil {
    /**
     * 標簽的正则
     */
    private String reg = "(?i)\\]*\\>";
    private Pattern pattern = Pattern.compile(reg);

    /**
     * li标签中的span元素,不能设置css,不然pdf无法识别
     */
    private String spanReg = "(?i)\\]*\\>";
    private Pattern spanPattern = Pattern.compile(spanReg);
    /**
     * li标签,不能设置css,不然pdf无法识别
     */
  /*  private String liReg = "(?i)\\]*\\>";
    private Pattern liPattern = Pattern.compile(liReg);*/
    /**
     * br标签元素,不能设置css,不然pdf无法识别
     */
    private String brReg = "(?i)\\]*\\>";
    private Pattern brPattern = Pattern.compile(brReg);
    /**
     * img标签需要闭合
     */
    private String imgReg = "(?i)\\]*\\>";
    private Pattern imgPattern = Pattern.compile(imgReg);
    /**
     * div标签元素,不能设置css,不然pdf无法识别
     */
    private String fontFamilyReg = "(?i)font-family[^\\;]*\\;";
    private Pattern fontFamilyPattern = Pattern.compile(fontFamilyReg);
    /**
     * 包含html标签
     */
    String regForHtml = "<([^>]*)>";
    Pattern htmlPattern = Pattern.compile(regForHtml);
    /**
     * 闭合
标签以及替换无效的font标签; * 对于导出pdf而言,如果外面设置了字体,那么在文本内容中存在font-family属性,页面不显示; * 原因是pdf会采用行内的样式,但是pdf对于其他字体兼容性问题,导致不显示 * @param content * @return */ public String dealHtmlContent(String content){ //1. 闭合br标签,如果br标签存在样式,也去掉 Matcher brMatcher = brPattern.matcher(content); if(brMatcher.find()){ content = content.replaceAll(brReg,"
"); } Matcher imgMatcher = imgPattern.matcher(content); if (imgMatcher.find() && !imgMatcher.group().endsWith("/>")) { content = content.replaceAll(imgReg, imgMatcher.group().trim().replaceAll(">", "/>")); } //去掉li标签中的样式 /* Matcher liMatcher = liPattern.matcher(content); if(liMatcher.find()){ content = content.replaceAll(liReg,"
  • "); }*/ // 去除font标签 Matcher fontMatcher = pattern.matcher(content); if(fontMatcher.find()){ //System.out.println(matcher.group()); content = content.replaceAll(reg,"").replaceAll("",""); } //去除span的样式 Matcher spanMatcher = spanPattern.matcher(content); if(spanMatcher.find()){ content = content.replaceAll(spanReg,""); } //去除font-family Matcher fontFamilyMatcher = fontFamilyPattern.matcher(content); if(fontFamilyMatcher.find()){ content = content.replaceAll(fontFamilyReg,""); } if(content.contains(""")){ content = content.replaceAll(""","'"); } if(content.contains("")){ content = content.replaceAll("","").replaceAll("",""); } return content; } /** * 判断字符串是否包含html标签 * @param str 字符串 * @return 结果 */ private boolean judgeStrContainsHtmlTag(String str) { Matcher matcher = htmlPattern.matcher(str); return matcher.find(); } }




  • © 2015 - 2024 Weber Informatics LLC | Privacy Policy