
com.itextpdf.tool.xml.XMLWorkerHelper Maven / Gradle / Ivy
/*
*
* This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
* Authors: Balder Van Camp, Emiel Ackermann, et al.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3
* as published by the Free Software Foundation with the addition of the
* following permission added to Section 15 as permitted in Section 7(a):
* FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
* ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
* OF THIRD PARTY RIGHTS.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
* details. You should have received a copy of the GNU Affero General Public
* License along with this program; if not, see http://www.gnu.org/licenses or
* write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA, 02110-1301 USA, or download the license from the following URL:
* http://itextpdf.com/terms-of-use/
*
* The interactive user interfaces in modified source and object code versions
* of this program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public License, a
* covered work must retain the producer line in every PDF that is created or
* manipulated using iText.
*
* You can be released from the requirements of the license by purchasing a
* commercial license. Buying such a license is mandatory as soon as you develop
* commercial activities involving the iText software without disclosing the
* source code of your own applications. These activities include: offering paid
* services to customers as an ASP, serving PDFs on the fly in a web
* application, shipping iText with a closed source product.
*
* For more information, please contact iText Software Corp. at this address:
* [email protected]
*/
package com.itextpdf.tool.xml;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.FontProvider;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.css.*;
import com.itextpdf.tool.xml.exceptions.RuntimeWorkerException;
import com.itextpdf.tool.xml.html.CssAppliers;
import com.itextpdf.tool.xml.html.CssAppliersImpl;
import com.itextpdf.tool.xml.html.TagProcessor;
import com.itextpdf.tool.xml.html.TagProcessorFactory;
import com.itextpdf.tool.xml.html.Tags;
import com.itextpdf.tool.xml.parser.XMLParser;
import com.itextpdf.tool.xml.pipeline.css.CSSResolver;
import com.itextpdf.tool.xml.pipeline.css.CssResolverPipeline;
import com.itextpdf.tool.xml.pipeline.end.ElementHandlerPipeline;
import com.itextpdf.tool.xml.pipeline.end.PdfWriterPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipeline;
import com.itextpdf.tool.xml.pipeline.html.HtmlPipelineContext;
import java.io.*;
import java.nio.charset.Charset;
/**
* A helper class for parsing XHTML/CSS or XML flow to PDF.
*
* @author redlab_b
*
*/
public class XMLWorkerHelper {
private static XMLWorkerHelper myself = new XMLWorkerHelper();
/**
* Get a Singleton XMLWorkerHelper
*
* @return a singleton instance of XMLWorkerHelper
*/
public synchronized static XMLWorkerHelper getInstance() {
return myself;
}
private TagProcessorFactory tpf;
private CssFile defaultCssFile;
/**
*/
private XMLWorkerHelper() {
}
/**
* @return the default css file.
*/
public static synchronized CssFile getCSS(InputStream in) {
CssFile cssFile = null;
if (null != in) {
final CssFileProcessor cssFileProcessor = new
CssFileProcessor();
BufferedReader br = new BufferedReader(new InputStreamReader(in));
try {
char[] buffer = new char[8192];
int length;
while ((length = br.read(buffer)) > 0) {
for(int i = 0 ; i < length; i++) {
cssFileProcessor.process(buffer[i]);
}
}
cssFile = new CSSFileWrapper(cssFileProcessor.getCss(), true);
} catch (final IOException e) { throw new RuntimeWorkerException(e); }
finally
{ try { in.close(); } catch (final IOException e) { throw new RuntimeWorkerException(e); } }
}
return cssFile;
}
public synchronized CssFile getDefaultCSS() {
if (null == defaultCssFile) {
defaultCssFile = getCSS(XMLWorkerHelper.class.getResourceAsStream("/default.css"));
}
return defaultCssFile;
}
/**
* Parses the xml data in the given reader and sends created {@link Element}
* s to the defined ElementHandler.
* This method configures the XMLWorker and XMLParser to parse (X)HTML/CSS
* and accept unknown tags.
*
* @param d the handler
* @param in the reader
* @throws IOException thrown when something went wrong with the IO
*/
public void parseXHtml(final ElementHandler d, final Reader in) throws IOException {
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.add(getDefaultCSS());
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(null);
hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(getDefaultTagProcessorFactory());
Pipeline> pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(hpc, new ElementHandlerPipeline(d,
null)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser();
p.addListener(worker);
p.parse(in);
}
/**
* Parses the xml data. This method configures the XMLWorker to parse
* (X)HTML/CSS and accept unknown tags. Writes the output in the given
* PdfWriter with the given document.
*
* @param writer the PdfWriter
* @param doc the Document
* @param in the reader
* @throws IOException thrown when something went wrong with the IO
*/
public void parseXHtml(final PdfWriter writer, final Document doc, final Reader in) throws IOException {
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.add(getDefaultCSS());
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(null);
hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(getDefaultTagProcessorFactory());
Pipeline> pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(hpc, new PdfWriterPipeline(doc,
writer)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser();
p.addListener(worker);
p.parse(in);
}
/**
* @param writer the writer to use
* @param doc the document to use
* @param in the {@link InputStream} of the XHTML source.
* @throws IOException if the {@link InputStream} could not be read.
*/
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in) throws IOException {
parseXHtml(writer, doc, in, XMLWorkerHelper.class.getResourceAsStream("/default.css"), null, new XMLWorkerFontProvider());
}
public void parseXHtml(PdfWriter writer, Document doc, InputStream in, Charset charset, final FontProvider fontProvider) throws IOException {
this.parseXHtml(writer, doc, in, XMLWorkerHelper.class.getResourceAsStream("/default.css"), charset, fontProvider);
}
/**
* @param writer the writer to use
* @param doc the document to use
* @param in the {@link InputStream} of the XHTML source.
* @param charset the charset to use
* @throws IOException if the {@link InputStream} could not be read.
*/
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final Charset charset) throws IOException {
parseXHtml(writer,doc,in, XMLWorkerHelper.class.getResourceAsStream("/default.css"), charset);
}
/**
* @param writer the writer to use
* @param doc the document to use
* @param in the {@link InputStream} of the XHTML source.
* @param in the {@link CssFiles} of the css files.
* @param charset the charset to use
* @throws IOException if the {@link InputStream} could not be read.
*/
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final InputStream inCssFile, final Charset charset, final FontProvider fontProvider) throws IOException {
parseXHtml(writer, doc, in, inCssFile, charset, fontProvider, null);
}
/**
* @param writer the writer to use
* @param doc the document to use
* @param in the {@link InputStream} of the XHTML source.
* @param in the {@link CssFiles} of the css files.
* @param charset the charset to use
* @param resourcesRootPath defines the root path to find resources in case they are defined in html with relative paths (e.g. images)
* @throws IOException if the {@link InputStream} could not be read.
*/
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final InputStream inCssFile, final Charset charset, final FontProvider fontProvider, final String resourcesRootPath) throws IOException {
CssFilesImpl cssFiles = new CssFilesImpl();
if (inCssFile != null)
cssFiles.add(getCSS(inCssFile));
else
cssFiles.add(getDefaultCSS());
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(new CssAppliersImpl(fontProvider));
hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(getDefaultTagProcessorFactory()).setResourcesRootPath(resourcesRootPath);
HtmlPipeline htmlPipeline = new HtmlPipeline(hpc, new PdfWriterPipeline(doc, writer));
Pipeline> pipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser(true, worker, charset);
if (charset != null)
p.parse(in, charset);
else
p.parse(in);
}
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final InputStream inCssFile) throws IOException {
parseXHtml(writer, doc, in, inCssFile, null, new XMLWorkerFontProvider());
}
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final InputStream inCssFile, final FontProvider fontProvider) throws IOException {
parseXHtml(writer, doc, in, inCssFile, null, fontProvider);
}
public void parseXHtml(final PdfWriter writer, final Document doc, final InputStream in, final InputStream inCssFile, final Charset charset) throws IOException {
parseXHtml(writer, doc, in, inCssFile, charset, new XMLWorkerFontProvider());
}
/**
* @param d the ElementHandler
* @param in the InputStream
* @param charset the charset to use
* @throws IOException if something went seriously wrong with IO.
*/
public void parseXHtml(final ElementHandler d, final InputStream in, final Charset charset) throws IOException {
CssFilesImpl cssFiles = new CssFilesImpl();
cssFiles.add(getDefaultCSS());
StyleAttrCSSResolver cssResolver = new StyleAttrCSSResolver(cssFiles);
HtmlPipelineContext hpc = new HtmlPipelineContext(null);
hpc.setAcceptUnknown(true).autoBookmark(true).setTagFactory(getDefaultTagProcessorFactory());
Pipeline> pipeline = new CssResolverPipeline(cssResolver, new HtmlPipeline(hpc, new ElementHandlerPipeline(d,
null)));
XMLWorker worker = new XMLWorker(pipeline, true);
XMLParser p = new XMLParser(true, worker, charset);
if (charset != null)
p.parse(in, charset);
else
p.parse(in);
}
/**
* Get a CSSResolver implementation.
*
* @param addDefaultCss true if the defaultCss should already be added.
* @return the default CSSResolver
*
*/
public CSSResolver getDefaultCssResolver(final boolean addDefaultCss) {
CSSResolver resolver = new StyleAttrCSSResolver();
if (addDefaultCss) {
resolver.addCss(getDefaultCSS());
}
return resolver;
}
/**
* Retrieves the default factory for processing HTML tags from
* {@link Tags#getHtmlTagProcessorFactory()}. On subsequent calls the same
* {@link TagProcessorFactory} is returned every time.
* @return a
* DefaultTagProcessorFactory that maps HTML tags to {@link TagProcessor}s
*/
protected synchronized TagProcessorFactory getDefaultTagProcessorFactory() {
if (null == tpf) {
tpf = Tags.getHtmlTagProcessorFactory();
}
return tpf;
}
/**
* Parses an HTML string and a string containing CSS into a list of Element objects.
* The FontProvider will be obtained from iText's FontFactory object.
*
* @param html a String containing an XHTML snippet
* @param css a String containing CSS
* @return an ElementList instance
*/
public static ElementList parseToElementList(String html, String css) throws IOException {
// CSS
CSSResolver cssResolver = new StyleAttrCSSResolver();
if (css != null) {
CssFile cssFile = XMLWorkerHelper.getCSS(new ByteArrayInputStream(css.getBytes()));
cssResolver.addCss(cssFile);
}
// HTML
CssAppliers cssAppliers = new CssAppliersImpl(FontFactory.getFontImp());
HtmlPipelineContext htmlContext = new HtmlPipelineContext(cssAppliers);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
htmlContext.autoBookmark(false);
// Pipelines
ElementList elements = new ElementList();
ElementHandlerPipeline end = new ElementHandlerPipeline(elements, null);
HtmlPipeline htmlPipeline = new HtmlPipeline(htmlContext, end);
CssResolverPipeline cssPipeline = new CssResolverPipeline(cssResolver, htmlPipeline);
// XML Worker
XMLWorker worker = new XMLWorker(cssPipeline, true);
XMLParser p = new XMLParser(worker);
p.parse(new ByteArrayInputStream(html.getBytes()));
return elements;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy