All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.msl.pdfier.commons.html.HTMLTidier Maven / Gradle / Ivy

There is a newer version: 9.1.20
Show newest version
package com.msl.pdfier.commons.html;

import java.net.URL;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.msl.pdfier.commons.Constants;
import com.msl.pdfier.commons.exception.PdfierException;
import com.msl.pdfier.commons.http.HTTPClient;

public class HTMLTidier {
	
	protected static final Logger logger = LoggerFactory.getLogger(HTMLTidier.class);

	public static String getHTMLTidied(URL sourceUrl) throws PdfierException {
		try {
			String htmlTidied = "";
			try {
				htmlTidied = HTTPClient.readUrlForPdf(sourceUrl.toURI());
			} catch (Exception ex) {
				logger.warn("Error adding external CSS to inline doc." + ex.getMessage());
			}
			htmlTidied = HTMLSanitizer.stripInvalidMarkup(htmlTidied, Constants.HTML_ELEMENTS_TO_STRIP);
			htmlTidied = HTMLPrintableUtil.moveStyleToHead(htmlTidied);
			htmlTidied = HTMLPrintableUtil.addExternalInlineStyleSheets(sourceUrl, htmlTidied);	
			htmlTidied = HTMLPrintableUtil.addCDATAToHeadStyleTags(htmlTidied);	
			return htmlTidied;
		} catch (Exception e) {
			logger.error("Error tidying HTML", e);
			throw new PdfierException("Error tidying HTML", e);
		}
	}
	
	public static String getHTMLTidied(URL requestUrl, String inputHTML) throws PdfierException {
		try {
			String htmlTidied = HTMLSanitizer.stripInvalidMarkup(inputHTML, Constants.HTML_ELEMENTS_TO_STRIP);
			htmlTidied = HTMLPrintableUtil.addMandatoryHtml(htmlTidied);	
			htmlTidied = HTMLPrintableUtil.moveStyleToHead(htmlTidied);
			htmlTidied = HTMLPrintableUtil.addExternalInlineStyleSheets(requestUrl, htmlTidied);
			return htmlTidied;
		} catch (Exception e) {
			logger.error("Error tidying HTML", e);
			throw new PdfierException("Error tidying HTML", e);
		}
	}

	public static String getHTMLTidied(String inputHTML) throws PdfierException {
		try {
			String htmlTidied = HTMLSanitizer.stripInvalidMarkup(inputHTML, Constants.HTML_ELEMENTS_TO_STRIP);
			htmlTidied = HTMLPrintableUtil.addMandatoryHtml(htmlTidied);	
			htmlTidied = HTMLPrintableUtil.moveStyleToHead(htmlTidied);;	
//			htmlTidied = HTMLPrintableUtil.addInlineStyleSheets(IOUtils.getInputStream(htmlTidied), CSS_FILES);		
			return htmlTidied;
		} catch (Exception e) {
			logger.error("Error tidying HTML", e);
			throw new PdfierException("Error tidying HTML", e);
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy