com.openhtmltopdf.pdfboxout.PdfBoxRenderer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openhtmltopdf-pdfbox Show documentation
Show all versions of openhtmltopdf-pdfbox Show documentation
Openhtmltopdf is a CSS 2.1 renderer written in Java. This artifact supports PDF output with Apache PDF-BOX 2.
The newest version!
/*
* {{{ header & license
* Copyright (c) 2006 Wisconsin Court System
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
* }}}
*/
package com.openhtmltopdf.pdfboxout;
import com.openhtmltopdf.bidi.BidiReorderer;
import com.openhtmltopdf.bidi.BidiSplitter;
import com.openhtmltopdf.bidi.BidiSplitterFactory;
import com.openhtmltopdf.bidi.SimpleBidiReorderer;
import com.openhtmltopdf.context.StyleReference;
import com.openhtmltopdf.css.constants.IdentValue;
import com.openhtmltopdf.css.style.CalculatedStyle;
import com.openhtmltopdf.extend.*;
import com.openhtmltopdf.layout.BoxBuilder;
import com.openhtmltopdf.layout.Layer;
import com.openhtmltopdf.layout.LayoutContext;
import com.openhtmltopdf.layout.SharedContext;
import com.openhtmltopdf.outputdevice.helper.BaseDocument;
import com.openhtmltopdf.outputdevice.helper.ExternalResourceControlPriority;
import com.openhtmltopdf.outputdevice.helper.ExternalResourceType;
import com.openhtmltopdf.extend.FSDOMMutator;
import com.openhtmltopdf.outputdevice.helper.PageDimensions;
import com.openhtmltopdf.outputdevice.helper.UnicodeImplementation;
import com.openhtmltopdf.pdfboxout.PdfBoxUtil.Metadata;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.CacheStore;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder.PdfAConformance;
import com.openhtmltopdf.render.BlockBox;
import com.openhtmltopdf.render.Box;
import com.openhtmltopdf.render.PageBox;
import com.openhtmltopdf.render.RenderingContext;
import com.openhtmltopdf.render.ViewportBox;
import com.openhtmltopdf.render.displaylist.DisplayListCollector;
import com.openhtmltopdf.render.displaylist.DisplayListContainer;
import com.openhtmltopdf.render.displaylist.DisplayListPainter;
import com.openhtmltopdf.render.displaylist.PagedBoxCollector;
import com.openhtmltopdf.render.displaylist.DisplayListContainer.DisplayListPageContainer;
import com.openhtmltopdf.resource.XMLResource;
import com.openhtmltopdf.simple.extend.XhtmlNamespaceHandler;
import com.openhtmltopdf.util.LogMessageId;
import com.openhtmltopdf.util.OpenUtil;
import com.openhtmltopdf.util.ThreadCtx;
import com.openhtmltopdf.util.XRLog;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.pdmodel.*;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.documentinterchange.logicalstructure.PDMarkInfo;
import org.apache.pdfbox.pdmodel.encryption.PDEncryption;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.interactive.viewerpreferences.PDViewerPreferences;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.*;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.InputSource;
import javax.xml.transform.*;
import java.awt.*;
import java.awt.geom.Rectangle2D;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
public class PdfBoxRenderer implements Closeable, PageSupplier {
// See discussion of units at top of PdfBoxOutputDevice.
private static final float DEFAULT_DOTS_PER_POINT = 20f * 4f / 3f;
private static final int DEFAULT_DOTS_PER_PIXEL = 20;
private static final int DEFAULT_PDF_POINTS_PER_INCH = 72;
private final SharedContext _sharedContext;
private final PdfBoxOutputDevice _outputDevice;
private final List _domMutators;
private Document _doc;
private BlockBox _root;
private final float _dotsPerPoint;
private PDDocument _pdfDoc;
private PDEncryption _pdfEncryption;
private String _producer;
// Usually 1.7
private float _pdfVersion;
private PdfAConformance _pdfAConformance;
private boolean _pdfUaConformance;
private byte[] _colorProfile;
private boolean _testMode;
private PDFCreationListener _listener;
private OutputStream _os;
private SVGDrawer _svgImpl;
private SVGDrawer _mathmlImpl;
private BidiSplitterFactory _splitterFactory;
private byte _defaultTextDirection = BidiSplitter.LTR;
private BidiReorderer _reorderer;
private PageSupplier _pageSupplier;
private final Closeable _diagnosticConsumer;
private final int _initialPageNumber;
/**
* This method is constantly changing as options are added to the builder.
*/
PdfBoxRenderer(
BaseDocument doc,
UnicodeImplementation unicode,
PageDimensions pageSize,
PdfRendererBuilderState state,
Closeable diagnosticConsumer) {
PdfBoxFontResolver fontResolver = null;
_pdfDoc = state.pddocument != null ? state.pddocument : new PDDocument();
try {
_diagnosticConsumer = diagnosticConsumer;
_pdfDoc.setVersion(state._pdfVersion);
_pdfVersion = state._pdfVersion;
_producer = state._producer;
_pageSupplier = state._pageSupplier != null ? state._pageSupplier : this;
_svgImpl = state._svgImpl;
_mathmlImpl = state._mathmlImpl;
_pdfAConformance = state._pdfAConformance;
_pdfUaConformance = state._pdfUaConform;
_colorProfile = state._colorProfile;
_dotsPerPoint = DEFAULT_DOTS_PER_POINT;
_testMode = state._testMode;
_outputDevice =
new PdfBoxFastOutputDevice(DEFAULT_DOTS_PER_POINT, _testMode,
state._pdfUaConform || state._pdfAConformance.getConformanceValue().equals("A"),
state._pdfAConformance != PdfAConformance.NONE);
_outputDevice.setWriter(_pdfDoc);
_outputDevice.setStartPageNo(_pdfDoc.getNumberOfPages());
PdfBoxUserAgent userAgent = new PdfBoxUserAgent(_outputDevice);
if (_svgImpl != null) {
_svgImpl.withUserAgent(userAgent);
}
userAgent.setProtocolsStreamFactory(state._streamFactoryMap);
if (state._resolver != null) {
userAgent.setUriResolver(state._resolver);
}
userAgent.setAccessController(ExternalResourceControlPriority.RUN_BEFORE_RESOLVING_URI, state._beforeAccessController);
userAgent.setAccessController(ExternalResourceControlPriority.RUN_AFTER_RESOLVING_URI, state._afterAccessController);
_sharedContext = new SharedContext();
_sharedContext.registerWithThread();
_sharedContext._preferredTransformerFactoryImplementationClass = state._preferredTransformerFactoryImplementationClass;
_sharedContext._preferredDocumentBuilderFactoryImplementationClass = state._preferredDocumentBuilderFactoryImplementationClass;
_sharedContext.setUserAgentCallback(userAgent);
_sharedContext.setCss(new StyleReference(userAgent));
userAgent.setSharedContext(_sharedContext);
_outputDevice.setSharedContext(_sharedContext);
fontResolver = new PdfBoxFontResolver(_sharedContext, _pdfDoc, state._caches.get(CacheStore.PDF_FONT_METRICS), state._pdfAConformance, state._pdfUaConform);
_sharedContext.setFontResolver(fontResolver);
PdfBoxReplacedElementFactory replacedElementFactory = new PdfBoxReplacedElementFactory(_outputDevice, state._svgImpl, state._objectDrawerFactory, state._mathmlImpl);
_sharedContext.setReplacedElementFactory(replacedElementFactory);
_sharedContext.setTextRenderer(new PdfBoxTextRenderer());
_sharedContext.setDPI(DEFAULT_PDF_POINTS_PER_INCH * _dotsPerPoint);
_sharedContext.setDotsPerPixel(DEFAULT_DOTS_PER_PIXEL);
_sharedContext.setPrint(true);
_sharedContext.setInteractive(false);
this.getSharedContext().setDefaultPageSize(pageSize.w, pageSize.h, pageSize.isSizeInches);
if (state._replacementText != null) {
this.getSharedContext().setReplacementText(state._replacementText);
}
if (unicode.splitterFactory != null) {
this._splitterFactory = unicode.splitterFactory;
}
if (unicode.reorderer != null) {
this._reorderer = unicode.reorderer;
this._outputDevice.setBidiReorderer(_reorderer);
}
if (unicode.lineBreaker != null) {
_sharedContext.setLineBreaker(unicode.lineBreaker);
}
if (unicode.charBreaker != null) {
_sharedContext.setCharacterBreaker(unicode.charBreaker);
}
if (unicode.toLowerTransformer != null) {
_sharedContext.setUnicodeToLowerTransformer(unicode.toLowerTransformer);
}
if (unicode.toUpperTransformer != null) {
_sharedContext.setUnicodeToUpperTransformer(unicode.toUpperTransformer);
}
if (unicode.toTitleTransformer != null) {
_sharedContext.setUnicodeToTitleTransformer(unicode.toTitleTransformer);
}
this._defaultTextDirection = unicode.textDirection ? BidiSplitter.RTL : BidiSplitter.LTR;
this._domMutators = state._domMutators;
if (doc.html != null) {
this.setDocumentFromStringP(doc.html, doc.baseUri);
}
else if (doc.document != null) {
this.setDocumentP(doc.document, doc.baseUri);
}
else if (doc.uri != null) {
this.setDocumentP(doc.uri);
}
else if (doc.file != null) {
try {
this.setDocumentP(doc.file);
} catch (IOException e) {
XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.EXCEPTION_PROBLEM_TRYING_TO_READ_INPUT_XHTML_FILE, e);
throw new RuntimeException("File IO problem", e);
}
}
this._os = state._os;
this._initialPageNumber = state._initialPageNumber;
} catch (Throwable e) {
if (state.pddocument == null) {
// We created it but exceptioned out before constructor
// finished so close here.
OpenUtil.closeQuietly(_pdfDoc);
}
OpenUtil.closeQuietly(fontResolver);
throw e;
}
}
public Document getDocument() {
return _doc;
}
/**
* Returns the PDDocument or null if it has been closed.
*/
public PDDocument getPdfDocument() {
return _pdfDoc;
}
/**
* Get the PDF-BOX font resolver. Can be used to add fonts in code.
*/
public PdfBoxFontResolver getFontResolver() {
return (PdfBoxFontResolver) _sharedContext.getFontResolver();
}
private Document loadDocument(String uri) {
return _sharedContext.getUserAgentCallback().getXMLResource(uri, ExternalResourceType.XML_XHTML).getDocument();
}
private void setDocumentP(String uri) {
setDocumentP(loadDocument(uri), uri);
}
private void setDocumentP(Document doc, String url) {
setDocumentP(doc, url, new XhtmlNamespaceHandler());
}
private void setDocumentP(File file) throws IOException {
File parent = file.getAbsoluteFile().getParentFile();
setDocumentP(loadDocument(file.toURI().toURL().toExternalForm()), (parent == null ? "" : parent.toURI().toURL().toExternalForm()));
}
private void setDocumentFromStringP(String content, String baseUrl) {
InputSource is = new InputSource(new BufferedReader(new StringReader(content)));
Document dom = XMLResource.load(is).getDocument();
setDocumentP(dom, baseUrl);
}
private void setDocumentP(Document doc, String url, NamespaceHandler nsh) {
_doc = doc;
/*
* Apply potential DOM mutations
*/
for (FSDOMMutator domMutator : _domMutators)
domMutator.mutateDocument(doc);
_sharedContext.setBaseURL(url);
_sharedContext.setNamespaceHandler(nsh);
_sharedContext.getCss().setDocumentContext(_sharedContext, _sharedContext.getNamespaceHandler(), doc, new NullUserInterface());
getFontResolver().importFontFaces(_sharedContext.getCss().getFontFaceRules());
if (_svgImpl != null) {
_svgImpl.importFontFaceRules(_sharedContext.getCss().getFontFaceRules(), _sharedContext);
}
if (_mathmlImpl != null) {
_mathmlImpl.importFontFaceRules(_sharedContext.getCss().getFontFaceRules(), _sharedContext);
}
}
public float getPDFVersion() {
return _pdfVersion == 0f ? 1.7f : _pdfVersion;
}
public void layout() {
LayoutContext c = newLayoutContext();
BlockBox root = BoxBuilder.createRootBox(c, _doc);
Box viewport = new ViewportBox(getInitialExtents(c));
root.setContainingBlock(viewport);
root.layout(c);
// Useful to see the box tree after layout.
// System.out.println(com.openhtmltopdf.util.LambdaUtil.descendantDump(root));
Dimension dim = root.getLayer().getPaintingDimension(c);
root.getLayer().trimEmptyPages(c, dim.height);
root.getLayer().layoutPages(c);
_root = root;
}
private Rectangle getInitialExtents(LayoutContext c) {
PageBox first = Layer.createPageBox(c, "first");
return new Rectangle(0, 0, first.getContentWidth(c), first.getContentHeight(c));
}
private RenderingContext newRenderingContext() {
RenderingContext result = _sharedContext.newRenderingContextInstance();
result.setFontContext(new PdfBoxFontContext());
result.setOutputDevice(_outputDevice);
if (_reorderer != null)
result.setBidiReorderer(_reorderer);
_outputDevice.setRenderingContext(result);
_sharedContext.getTextRenderer().setup(result.getFontContext());
result.setRootLayer(_root.getLayer());
return result;
}
private LayoutContext newLayoutContext() {
LayoutContext result = _sharedContext.newLayoutContextInstance();
result.setFontContext(new PdfBoxFontContext());
if (_splitterFactory != null)
result.setBidiSplitterFactory(_splitterFactory);
if (_reorderer != null)
result.setBidiReorderer(_reorderer);
result.setDefaultTextDirection(_defaultTextDirection);
((PdfBoxTextRenderer) _sharedContext.getTextRenderer()).setup(result.getFontContext(), _reorderer != null ? _reorderer : new SimpleBidiReorderer());
return result;
}
/**
* Creates a PDF with setup specified by builder. On finsihing or failing, saves (if successful) and closes the PDF document.
*/
public void createPDF() throws IOException {
createPdfFast(true, 0);
}
/**
* Creates a PDF with setup specified by builder.
* On finsihing or failing, DOES NOT save or close the PDF document.
* Useful for post-processing the PDDocument which can be retrieved by {@link #getPdfDocument()}.
*
* Prefer {@link #createPDFKeepOpen()} with try-with-resources.
*/
public void createPDFWithoutClosing() throws IOException {
createPdfFast(false, 0);
}
/**
* Creates a PDF with setup specified by builder.
* On finishing or failing, DOES NOT save or close the PDF document.
* Useful for post-processing the PDDocument which is returned.
*
* NOTE: It is recommended to use this method with try-with-resources
* to avoid leaving the PDDocument open.
*/
public PDDocument createPDFKeepOpen() throws IOException {
createPDFWithoutClosing();
return getPdfDocument();
}
/**
* @deprecated
*/
@Deprecated
public void finishPDF() throws IOException {
if (_pdfDoc != null) {
fireOnClose();
OpenUtil.closeQuietly(_pdfDoc);
}
}
/**
* Go fast!
*/
private void createPdfFast(boolean finish, int initialPageNo) throws IOException {
boolean success = false;
try {
XRLog.log(Level.INFO, LogMessageId.LogMessageId0Param.GENERAL_PDF_USING_FAST_MODE);
// renders the layout if it wasn't created
if (_root == null) {
this.layout();
}
List pages = _root.getLayer().getPages();
RenderingContext c = newRenderingContext();
c.setInitialPageNo(initialPageNo != 0 ? initialPageNo : _initialPageNumber);
c.setFastRenderer(true);
PageBox firstPage = pages.get(0);
Rectangle2D firstPageSize = new Rectangle2D.Float(0, 0,
firstPage.getWidth(c) / _dotsPerPoint,
firstPage.getHeight(c) / _dotsPerPoint);
if (_pdfEncryption != null) {
_pdfDoc.setEncryptionDictionary(_pdfEncryption);
}
firePreOpen();
writePDFFast(pages, c, firstPageSize, _pdfDoc);
success = true;
} finally {
if (finish) {
try {
fireOnClose();
if (success) {
_pdfDoc.save(_os);
}
} finally {
OpenUtil.closeQuietly(_pdfDoc);
_pdfDoc = null;
}
}
}
}
private void firePreOpen() {
if (_listener != null) {
_listener.preOpen(this);
}
}
private void firePreWrite(int pageCount) {
if (_listener != null) {
_listener.preWrite(this, pageCount);
}
}
private void fireOnClose() {
if (_listener != null) {
_listener.onClose(this);
}
}
private PDPageContentStream initPage(
PDDocument doc, float w, float h, int mainPageIndex, int shadowPageIndex) throws IOException {
PDPage page = _pageSupplier.requestPage(doc, w, h, mainPageIndex, shadowPageIndex);
PDPageContentStream cs = new PDPageContentStream(doc, page, AppendMode.APPEND, !_testMode);
_outputDevice.initializePage(cs, page, h);
return cs;
}
private void writePDFFast(
List pages,
RenderingContext c,
Rectangle2D firstPageSize,
PDDocument doc) throws IOException {
_outputDevice.setRoot(_root);
_outputDevice.start(_doc);
_root.getLayer().assignPagePaintingPositions(c, Layer.PAGED_MODE_PRINT);
int pageCount = _root.getLayer().getPages().size();
c.setPageCount(pageCount);
int pdfPageIndex = 0;
DisplayListContainer dlPages = null;
for (int i = 0; i < pageCount; i++) {
float nextW;
float nextH;
PageBox page = pages.get(i);
if (i == 0) {
nextW = (float) firstPageSize.getWidth();
nextH = (float) firstPageSize.getHeight();
} else {
nextW = page.getWidth(c) / _dotsPerPoint;
nextH = page.getHeight(c) / _dotsPerPoint;
}
DisplayListPageContainer pageOperations;
try (PDPageContentStream cs = initPage(doc, nextW, nextH, i, -1)) {
if (i == 0) {
firePreWrite(pageCount); // opportunity to adjust meta data
setDidValues(doc); // set PDF header fields from meta data
if (_pdfUaConformance || _pdfAConformance != PdfAConformance.NONE) {
addPdfASchema(doc, _pdfAConformance, _pdfUaConformance);
}
DisplayListCollector dlCollector = new DisplayListCollector(_root.getLayer().getPages());
dlPages = dlCollector.collectRoot(c, _root.getLayer());
}
page.setBasePagePdfPageIndex(pdfPageIndex);
pageOperations = dlPages.getPageInstructions(i);
c.setPage(i, page);
c.setShadowPageNumber(-1);
paintPageFast(c, page, pageOperations, 0);
_outputDevice.finishPage();
}
pdfPageIndex++;
if (!pageOperations.shadowPages().isEmpty()) {
paintShadowPages(
c, doc, pdfPageIndex, page, pageOperations.shadowPages());
pdfPageIndex += pageOperations.shadowPages().size();
}
}
_outputDevice.finish(c, _root);
}
/**
* Shadow pages are an opt-in feature that allows cut off content beyond
* the right edge (or left edge for RTL mode) of the main page to be
* output as a series of shadow pages.
*
* It may be useful for example for large tables.
*/
private void paintShadowPages(
RenderingContext c,
PDDocument doc,
int mainPageIndex,
PageBox currentPage,
List shadows) throws IOException {
int count = shadows.size();
currentPage.setShadowPageCount(count);
int pageContentWidth = currentPage.getContentWidth(c);
int translateIncrement = pageContentWidth * (currentPage.getCutOffPageDirection() == IdentValue.LTR ? 1 : -1);
int translateX = translateIncrement;
for (int i = 0; i < count; i++) {
DisplayListPageContainer shadow = shadows.get(i);
float shadowWidth = currentPage.getWidth(c) / _dotsPerPoint;
float shadowHeight = currentPage.getHeight(c) / _dotsPerPoint;
PDPage shadowPdPage =
_pageSupplier.requestPage(doc, shadowWidth, shadowHeight, mainPageIndex, i);
try (PDPageContentStream shadowCs = new PDPageContentStream(doc, shadowPdPage, AppendMode.APPEND, !_testMode)) {
_outputDevice.initializePage(shadowCs, shadowPdPage, shadowHeight);
c.setShadowPageNumber(i);
paintPageFast(c, currentPage, shadow, -translateX);
_outputDevice.finishPage();
}
translateX += translateIncrement;
}
}
// Kindly provided by GurpusMaximus at:
// https://stackoverflow.com/questions/49682339/how-can-i-create-an-accessible-pdf-with-java-pdfbox-2-0-8-library-that-is-also-v
private void addPdfASchema(PDDocument document, PdfAConformance pdfAConformance, boolean isPdfUa) {
PDDocumentInformation information = document.getDocumentInformation();
XMPMetadata metadata = XMPMetadata.createXMPMetadata();
try {
// NOTE: These XMP metadata MUST match up with the document information dictionary
// to be a valid PDF/A document, As per ISO 19005-1:2005/Cor.1:2007, 6.7.2
String title = information.getTitle();
String author = information.getAuthor();
String subject = information.getSubject();
String keywords = information.getKeywords();
String creator = information.getCreator();
String producer = information.getProducer();
Calendar creationDate = information.getCreationDate();
Calendar modDate = information.getModificationDate();
if (isPdfUa && (title == null || title.isEmpty())) {
XRLog.log(Level.WARNING, LogMessageId.LogMessageId0Param.GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_TITLE_PROVIDED);
}
if (pdfAConformance != PdfAConformance.NONE) {
PDFAIdentificationSchema pdfaid = metadata.createAndAddPDFAIdentificationSchema();
pdfaid.setConformance(pdfAConformance.getConformanceValue());
pdfaid.setPart(pdfAConformance.getPart());
AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setPDFVersion(String.valueOf(pdfAConformance.getPdfVersion()));
if (keywords != null) {
pdfSchema.setKeywords(keywords);
}
if (producer != null) {
pdfSchema.setProducer(producer);
}
XMPBasicSchema xmpBasicSchema = metadata.createAndAddXMPBasicSchema();
if (creator != null) {
xmpBasicSchema.setCreatorTool(creator);
}
if (creationDate != null) {
xmpBasicSchema.setCreateDate(creationDate);
}
if (modDate != null) {
xmpBasicSchema.setModifyDate(modDate);
}
}
DublinCoreSchema dc = metadata.createAndAddDublinCoreSchema();
dc.setFormat("application/pdf");
if (author != null) {
dc.addCreator(author);
}
if (title != null) {
dc.setTitle(title);
}
if (subject != null) {
dc.setDescription(subject);
} else if (isPdfUa) {
XRLog.log(Level.WARNING,
LogMessageId.LogMessageId0Param.GENERAL_PDF_ACCESSIBILITY_NO_DOCUMENT_DESCRIPTION_PROVIDED);
}
PDFAExtensionSchema pdfAExt = metadata.createAndAddPDFAExtensionSchemaWithDefaultNS();
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/extension/", "pdfaExtension");
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/schema#", "pdfaSchema");
pdfAExt.addNamespace("http://www.aiim.org/pdfa/ns/property#", "pdfaProperty");
if (pdfAConformance != PdfAConformance.NONE) {
// Description of Adobe PDF Schema
List pdfProperties = new ArrayList<>(3);
pdfProperties.add(
createPdfaProperty("internal", "The PDF file version.", "PDFVersion", "Text"));
pdfProperties.add(
createPdfaProperty("external", "Keywords.", "Keywords", "Text"));
pdfProperties.add(
createPdfaProperty("internal", "The name of the tool that created the PDF document.", "Producer", "AgentName"));
pdfAExt.addBagValue("schemas",
createPdfaSchema("Adobe PDF Schema", "http://ns.adobe.com/pdf/1.3/", "pdf", pdfProperties));
// Description of PDF/A ID Schema
List pdfaidProperties = new ArrayList<>(2);
pdfaidProperties.add(
createPdfaProperty("internal", "Part of PDF/A standard", "part", "Integer"));
pdfaidProperties.add(
createPdfaProperty("internal", "Conformance level of PDF/A standard", "conformance", "Text"));
pdfAExt.addBagValue("schemas",
createPdfaSchema("PDF/A ID Schema", "http://www.aiim.org/pdfa/ns/id/", "pdfaid", pdfaidProperties));
}
if (isPdfUa) {
// Description of PDF/UA
List pdfUaProperties = new ArrayList<>(1);
pdfUaProperties.add(
createPdfaProperty("internal", "Indicates, which part of ISO 14289 standard is followed", "part", "Integer"));
XMPSchema pdfUa = createPdfaSchema("PDF/UA Universal Accessibility Schema", "http://www.aiim.org/pdfua/ns/id/", "pdfuaid" , pdfUaProperties);
pdfAExt.addBagValue("schemas", pdfUa);
pdfAExt.addNamespace("http://www.aiim.org/pdfua/ns/id/", "pdfuaid");
pdfAExt.setPrefix("pdfuaid");
pdfAExt.setTextPropertyValue("part", "1");
}
PDMetadata metadataStream = new PDMetadata(document);
PDMarkInfo markInfo = new PDMarkInfo();
markInfo.setMarked(true);
// add to catalog
PDDocumentCatalog catalog = document.getDocumentCatalog();
catalog.setMetadata(metadataStream);
catalog.setMarkInfo(markInfo);
String lang = _doc.getDocumentElement().getAttribute("lang");
catalog.setLanguage(!lang.isEmpty() ? lang : "EN-US");
catalog.setViewerPreferences(new PDViewerPreferences(new COSDictionary()));
catalog.getViewerPreferences().setDisplayDocTitle(true);
XmpSerializer serializer = new XmpSerializer();
ByteArrayOutputStream baos = new ByteArrayOutputStream();
serializer.serialize(metadata, baos, true);
String xmp = baos.toString("UTF-8");
// Fix for bad XML generation by some transformers
xmp = xmp.replace(" lang=\"x-default\"", " xml:lang=\"x-default\"");
metadataStream.importXMPMetadata(xmp.getBytes(StandardCharsets.UTF_8));
if (_colorProfile != null) {
ByteArrayInputStream colorProfile = new ByteArrayInputStream(_colorProfile);
PDOutputIntent oi = new PDOutputIntent(document, colorProfile);
oi.setInfo("sRGB IEC61966-2.1");
oi.setOutputCondition("sRGB IEC61966-2.1");
oi.setOutputConditionIdentifier("sRGB IEC61966-2.1");
oi.setRegistryName("http://www.color.org");
catalog.addOutputIntent(oi);
}
} catch (BadFieldValueException | IOException | TransformerException e) {
throw new RuntimeException(e);
}
}
/**
* Creates an XML Schema to be used in the PDFA Extension
*/
private XMPSchema createPdfaSchema(String schema, String namespace, String prefix, List properties) {
XMPSchema xmpSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaSchema", "pdfaSchema", "pdfaSchema");
xmpSchema.setTextPropertyValue("schema", schema);
xmpSchema.setTextPropertyValue("namespaceURI", namespace);
xmpSchema.setTextPropertyValue("prefix", prefix);
for (XMPSchema property : properties) {
xmpSchema.addUnqualifiedSequenceValue("property", property);
}
return xmpSchema;
}
/**
* Creates an XML Property to be used in the PDFA Extension
*/
private XMPSchema createPdfaProperty(String category, String description, String name, String valueType) {
XMPSchema xmpSchema = new XMPSchema(XMPMetadata.createXMPMetadata(),
"pdfaProperty", "pdfaProperty", "pdfaProperty");
xmpSchema.setTextPropertyValue("name", name);
xmpSchema.setTextPropertyValue("valueType", valueType);
xmpSchema.setTextPropertyValue("category", category);
xmpSchema.setTextPropertyValue("description", description);
return xmpSchema;
}
/**
* Sets the document information dictionary values from html metadata
*/
private void setDidValues(PDDocument doc) {
PDDocumentInformation info = new PDDocumentInformation();
info.setCreationDate(Calendar.getInstance());
if (_producer == null) {
info.setProducer("openhtmltopdf.com");
} else {
info.setProducer(_producer);
}
for (Metadata metadata : _outputDevice.getMetadata()) {
String name = metadata.getName();
if (name.isEmpty())
continue;
String content = metadata.getContent();
if( content == null )
continue;
if( name.equals("title"))
info.setTitle(content);
else if( name.equals("author"))
info.setAuthor(content);
else if(name.equals("subject"))
info.setSubject(content);
else if(name.equals("keywords"))
info.setKeywords(content);
else
info.setCustomMetadataValue(name,content);
}
doc.setDocumentInformation(info);
}
private void paintPageFast(RenderingContext c, PageBox page, DisplayListPageContainer pageOperations, int additionalTranslateX) {
page.paintBackground(c, 0, Layer.PAGED_MODE_PRINT);
c.setInPageMargins(true);
page.paintMarginAreas(c, 0, Layer.PAGED_MODE_PRINT);
c.setInPageMargins(false);
page.paintBorder(c, 0, Layer.PAGED_MODE_PRINT);
Rectangle content = page.getPrintClippingBounds(c);
_outputDevice.pushClip(content);
int top = -page.getPaintingTop() + page.getMarginBorderPadding(c, CalculatedStyle.TOP);
int left = page.getMarginBorderPadding(c, CalculatedStyle.LEFT);
int translateX = left + additionalTranslateX;
_outputDevice.translate(translateX, top);
DisplayListPainter painter = new DisplayListPainter();
painter.paint(c, pageOperations);
_outputDevice.translate(-translateX, -top);
_outputDevice.popClip();
}
public PdfBoxOutputDevice getOutputDevice() {
return _outputDevice;
}
public SharedContext getSharedContext() {
return _sharedContext;
}
public BlockBox getRootBox() {
return _root;
}
public float getDotsPerPoint() {
return _dotsPerPoint;
}
public List> findPagePositionsByID(Pattern pattern) {
return _outputDevice.findPagePositionsByID(newLayoutContext(), pattern);
}
private static final class NullUserInterface implements UserInterface {
@Override
public boolean isHover(Element e) {
return false;
}
@Override
public boolean isActive(Element e) {
return false;
}
@Override
public boolean isFocus(Element e) {
return false;
}
}
public PDFCreationListener getListener() {
return _listener;
}
public void setListener(PDFCreationListener listener) {
_listener = listener;
}
private void cleanup() {
OpenUtil.closeQuietly(_outputDevice);
OpenUtil.tryQuietly(_sharedContext::removeFromThread);
OpenUtil.closeQuietly(_diagnosticConsumer);
OpenUtil.tryQuietly(ThreadCtx::cleanup);
// Close all still open font files
OpenUtil.closeQuietly((PdfBoxFontResolver) getSharedContext().getFontResolver());
if (_svgImpl != null) {
OpenUtil.closeQuietly(_svgImpl);
}
if (_mathmlImpl != null) {
OpenUtil.closeQuietly(_mathmlImpl);
}
}
/**
* Cleanup thread resources.
* MUST be called after finishing with the renderer.
*/
@Override
public void close() {
this.cleanup();
}
@Override
public PDPage requestPage(PDDocument doc, float pageWidth, float pageHeight, int pageNumber, int shadowPageNumber) {
PDPage page = new PDPage(new PDRectangle(pageWidth, pageHeight));
doc.addPage(page);
return page;
}
/**
* Start page to end page and then top to bottom on page.
*/
private final Comparator> PAGE_POSITION_COMPARATOR =
Comparator.comparingInt(PagePosition>::getPageNo)
.thenComparing(Comparator.comparingDouble(PagePosition>::getY).reversed());
/**
* Returns the bottom Y postion in bottom-up PDF units
* on the last page of content.
*
* WARNING: NOT transform aware.
*/
public float getLastContentBottom() {
List> positions = getLayersPositions();
if (positions.isEmpty()) {
return 0;
}
return positions.get(positions.size() - 1).getY();
}
/**
* Returns a list of page positions for all layers in the document.
* The page positions are sorted from first page to last and then top to bottom.
* The page position values are in bottom-up PDF units.
*
* WARNING: NOT transform aware. Transformed layers will return page
* positions that are not correct.
*/
public List> getLayersPositions() {
if (getRootBox() == null) {
this.layout();
}
Layer rootLayer = getRootBox().getLayer();
int[] whiches = new int[] { Layer.NEGATIVE, Layer.AUTO, Layer.ZERO, Layer.POSITIVE };
List layers =
Arrays.stream(whiches)
.mapToObj(rootLayer::collectLayers)
.flatMap(List::stream)
.collect(Collectors.toList());
RenderingContext ctx = newRenderingContext();
List pages = rootLayer.getPages();
List> ret = new ArrayList<>();
ret.addAll(getLayerPagePositions(rootLayer, pages, ctx));
layers.stream()
.map(layer -> getLayerPagePositions(layer, pages, ctx))
.forEach(ret::addAll);
Collections.sort(ret, PAGE_POSITION_COMPARATOR);
return ret;
}
/**
* Returns a list of page positions for a single layer.
* The page positions are sorted from first page to last and then top to bottom.
* The page position values are in bottom-up PDF units.
*
* Compare to {@link #getLayersPositions()} which will return page
* positions for all layers.
*
* WARNING: NOT transform aware. A transformed layer will return page
* positions that are not correct.
*/
public List> getLayerPositions(Layer layer) {
RenderingContext ctx = newRenderingContext();
List pages = layer.getPages();
List> ret = getLayerPagePositions(layer, pages, ctx);
Collections.sort(ret, PAGE_POSITION_COMPARATOR);
return ret;
}
private List> getLayerPagePositions(
Layer layer, List pages, RenderingContext ctx) {
// FIXME: This method is not transform aware.
Box box = layer.getMaster();
int start = findStartPage(ctx, layer, pages);
int end = findEndPage(ctx, layer, pages);
if (box.getStyle().isFixed()) {
PageBox page = pages.get(start);
float x = box.getAbsX() + page.getMarginBorderPadding(ctx, CalculatedStyle.LEFT);
float w = box.getEffectiveWidth();
float y = page.getMarginBorderPadding(ctx, CalculatedStyle.BOTTOM) +
(page.getPaintingBottom() - box.getAbsY() - box.getHeight());
float h = box.getHeight();
return IntStream.range(0, pages.size())
.mapToObj(pageNo -> createPagePosition(null, layer, pageNo, x, y, w, h))
.collect(Collectors.toList());
}
List> ret = new ArrayList<>((end - start) + 1);
for (int i = start; i <= end; i++) {
PageBox page = pages.get(i);
float x = box.getAbsX() + page.getMarginBorderPadding(ctx, CalculatedStyle.LEFT);
float w = box.getEffectiveWidth();
float y;
float h;
if (start != end) {
if (i != start && i != end) {
y = page.getMarginBorderPadding(ctx, CalculatedStyle.BOTTOM);
h = page.getContentHeight(ctx);
} else if (i == end) {
h = (box.getAbsY() + box.getHeight()) - page.getPaintingTop();
y = page.getMarginBorderPadding(ctx, CalculatedStyle.BOTTOM) +
page.getContentHeight(ctx) - h;
} else {
assert i == start;
y = page.getMarginBorderPadding(ctx, CalculatedStyle.BOTTOM);
h = page.getPaintingBottom() - box.getAbsY();
}
} else {
y = page.getMarginBorderPadding(ctx, CalculatedStyle.BOTTOM) +
(page.getPaintingBottom() - box.getAbsY() - box.getHeight());
h = box.getHeight();
}
PagePosition pos = createPagePosition(null, layer, i, x, y, w, h);
ret.add(pos);
}
return ret;
}
private PagePosition createPagePosition(
String id, T element, int pageNo, float x, float y, float w, float h) {
return new PagePosition<>(
id, element, pageNo, x / _dotsPerPoint, y / _dotsPerPoint, w / _dotsPerPoint, h / _dotsPerPoint);
}
/**
* Returns the start page for a layer. Transform aware.
*/
private int findStartPage(RenderingContext c, Layer layer, List pages) {
int start = PagedBoxCollector.findStartPage(c, layer.getMaster(), pages);
// Floats maybe outside the master box.
for (BlockBox floater : layer.getFloats()) {
start = Math.min(start, PagedBoxCollector.findStartPage(c, floater, pages));
}
return start;
}
/**
* Returns the end page number for a layer. Transform aware.
*/
private int findEndPage(RenderingContext c, Layer layer, List pages) {
int end = PagedBoxCollector.findEndPage(c, layer.getMaster(), pages);
// Floats may be outside the master box.
for (BlockBox floater : layer.getFloats()) {
end = Math.max(end, PagedBoxCollector.findEndPage(c, floater, pages));
}
return end;
}
}