
com.itextpdf.pdfcleanup.PdfCleanUpTool Maven / Gradle / Ivy
The newest version!
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.pdfcleanup;
import com.itextpdf.commons.actions.EventManager;
import com.itextpdf.io.source.PdfTokenizer;
import com.itextpdf.io.source.RandomAccessFileOrArray;
import com.itextpdf.io.source.RandomAccessSourceFactory;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.colors.Color;
import com.itextpdf.kernel.colors.DeviceCmyk;
import com.itextpdf.kernel.colors.DeviceGray;
import com.itextpdf.kernel.colors.DeviceRgb;
import com.itextpdf.kernel.font.PdfFont;
import com.itextpdf.kernel.font.PdfFontFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfArray;
import com.itextpdf.kernel.pdf.PdfBoolean;
import com.itextpdf.kernel.pdf.PdfDictionary;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfName;
import com.itextpdf.kernel.pdf.PdfNumber;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfStream;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfPopupAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfRedactAnnotation;
import com.itextpdf.kernel.pdf.canvas.CanvasArtifact;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.xobject.PdfFormXObject;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.layout.LayoutArea;
import com.itextpdf.layout.properties.Property;
import com.itextpdf.layout.properties.TextAlignment;
import com.itextpdf.pdfcleanup.actions.event.PdfSweepProductEvent;
import com.itextpdf.pdfcleanup.exceptions.CleanupExceptionMessageConstant;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
/**
* Represents the main mechanism for cleaning a PDF document.
*/
public class PdfCleanUpTool {
private final PdfDocument pdfDocument;
private final CleanUpProperties properties;
/**
* Key - page number, value - list of locations related to the page.
*/
private final Map> pdfCleanUpLocations;
private final FilteredImagesCache filteredImagesCache;
/**
* Keys - redact annotations to be removed from the document after clean up,
* values - list of regions defined by redact annotation.
*/
private Map> redactAnnotations;
/**
* Creates a {@link PdfCleanUpTool} object. No regions for erasing are specified.
* Use {@link PdfCleanUpTool#addCleanupLocation(PdfCleanUpLocation)} method
* to set regions to be erased from the document.
*
* @param pdfDocument A {@link PdfDocument} object representing the document to which redaction applies.
*/
public PdfCleanUpTool(PdfDocument pdfDocument) {
this(pdfDocument, false, new CleanUpProperties());
}
/**
* Creates a {@link PdfCleanUpTool} object. If {@code cleanRedactAnnotations} is true,
* regions to be erased are extracted from the redact annotations contained inside the given document.
* Those redact annotations will be removed from the resultant document. If {@code cleanRedactAnnotations} is false,
* then no regions for erasing are specified. In that case use {@link PdfCleanUpTool#addCleanupLocation(PdfCleanUpLocation)}
* method to set regions to be erased from the document.
*
* @param pdfDocument A {@link PdfDocument} object representing the document to which redaction applies.
* @param cleanRedactAnnotations if true - regions to be erased are extracted from the redact annotations contained
* @param properties additional properties for clean-up process
* inside the given document.
*/
public PdfCleanUpTool(PdfDocument pdfDocument, boolean cleanRedactAnnotations, CleanUpProperties properties) {
EventManager.getInstance().onEvent(PdfSweepProductEvent.createCleanupPdfEvent(
pdfDocument.getDocumentIdWrapper(), properties.getMetaInfo()));
if (pdfDocument.getReader() == null || pdfDocument.getWriter() == null) {
throw new PdfException(CleanupExceptionMessageConstant.PDF_DOCUMENT_MUST_BE_OPENED_IN_STAMPING_MODE);
}
this.properties = properties;
this.pdfDocument = pdfDocument;
this.pdfCleanUpLocations = new HashMap<>();
this.filteredImagesCache = new FilteredImagesCache();
if (cleanRedactAnnotations) {
addCleanUpLocationsBasedOnRedactAnnotations();
}
}
/**
* Creates a {@link PdfCleanUpTool} object based on the given {@link java.util.List}
* of {@link PdfCleanUpLocation}s representing regions to be erased from the document.
*
* @param cleanUpLocations list of locations to be cleaned up {@link PdfCleanUpLocation}
* @param pdfDocument a {@link PdfDocument} object representing the document to which redaction applies.
* @param properties additional properties for clean-up process
*/
public PdfCleanUpTool(PdfDocument pdfDocument, List cleanUpLocations,
CleanUpProperties properties) {
this(pdfDocument, false, properties);
for (PdfCleanUpLocation location : cleanUpLocations) {
addCleanupLocation(location);
}
}
/**
* Adds a {@link PdfCleanUpLocation} to be cleaned up.
*
* @param cleanUpLocation a {@link PdfCleanUpLocation} to be cleaned up
*
* @return this {@link PdfCleanUpTool}
*/
public PdfCleanUpTool addCleanupLocation(PdfCleanUpLocation cleanUpLocation) {
List pgLocations = this.pdfCleanUpLocations.get(cleanUpLocation.getPage());
if (pgLocations == null) {
pgLocations = new ArrayList<>();
this.pdfCleanUpLocations.put(cleanUpLocation.getPage(), pgLocations);
}
pgLocations.add(cleanUpLocation);
return this;
}
/**
* Cleans the document by erasing all the areas which are provided or extracted from redaction annotations.
*
* @throws IOException IOException
*/
public void cleanUp() throws IOException {
for (Map.Entry> entry : pdfCleanUpLocations.entrySet()) {
cleanUpPage(entry.getKey(), entry.getValue());
}
if (redactAnnotations != null) { // if it isn't null, then we are in "extract locations from redact annots" mode
removeRedactAnnots();
}
pdfCleanUpLocations.clear();
}
/**
* Cleans a page from the document by erasing all the areas which
* are provided or extracted from redaction annotations.
*
* @param pageNumber the page to be cleaned up
* @param cleanUpLocations the locations to be cleaned up
*/
private void cleanUpPage(int pageNumber, List cleanUpLocations) {
if (cleanUpLocations.size() == 0) {
return;
}
List regions = new ArrayList<>();
for (PdfCleanUpLocation cleanUpLocation : cleanUpLocations) {
regions.add(cleanUpLocation.getRegion());
}
PdfPage page = pdfDocument.getPage(pageNumber);
PdfCleanUpProcessor cleanUpProcessor = new PdfCleanUpProcessor(regions, pdfDocument, this.properties);
cleanUpProcessor.setFilteredImagesCache(filteredImagesCache);
cleanUpProcessor.processPageContent(page);
if (properties.isProcessAnnotations()) {
cleanUpProcessor.processPageAnnotations(page, regions, redactAnnotations != null);
}
PdfCanvas pageCleanedContents = cleanUpProcessor.popCleanedCanvas();
page.put(PdfName.Contents, pageCleanedContents.getContentStream());
page.setResources(pageCleanedContents.getResources());
colorCleanedLocations(pageCleanedContents, cleanUpLocations);
}
/**
* Draws colored rectangles on the PdfCanvas corresponding to the PdfCleanUpLocation objects.
*
* @param canvas the PdfCanvas on which to draw
* @param cleanUpLocations the PdfCleanUpLocations
*/
private void colorCleanedLocations(PdfCanvas canvas, List cleanUpLocations) {
for (PdfCleanUpLocation location : cleanUpLocations) {
if (location.getCleanUpColor() != null) {
addColoredRectangle(canvas, location);
}
}
}
/**
* Draws a colored rectangle on the PdfCanvas correponding to a PdfCleanUpLocation.
*
* @param canvas the PdfCanvas on which to draw
* @param location the PdfCleanUpLocation
*/
private void addColoredRectangle(PdfCanvas canvas, PdfCleanUpLocation location) {
if (pdfDocument.isTagged()) {
canvas.openTag(new CanvasArtifact());
}
// To avoid the float calculation precision differences in Java and .Net,
// the values of rectangles to be drawn are rounded
float x = (float)(Math.floor(location.getRegion().getX() * 2.0) / 2.0);
float y = (float)(Math.floor(location.getRegion().getY() * 2.0) / 2.0);
float width = (float)(Math.floor(location.getRegion().getWidth() * 2.0) / 2.0);
float height = (float)(Math.floor(location.getRegion().getHeight() * 2.0) / 2.0);
Rectangle rect = new Rectangle(x, y, width, height);
canvas
.saveState()
.setFillColor(location.getCleanUpColor())
.rectangle(rect)
.fill()
.restoreState();
if (pdfDocument.isTagged()) {
canvas.closeTag();
}
}
/**
* Adds clean up locations to be erased by extracting regions from the redact annotations
* contained inside the given document. Those redact annotations will be removed from the resultant document.
*/
private void addCleanUpLocationsBasedOnRedactAnnotations() {
redactAnnotations = new LinkedHashMap<>();
for (int i = 1; i <= pdfDocument.getNumberOfPages(); ++i) {
extractLocationsFromRedactAnnotations(pdfDocument.getPage(i));
}
}
private void extractLocationsFromRedactAnnotations(PdfPage page) {
List annotations = page.getAnnotations();
for (PdfAnnotation annotation : annotations) {
if (PdfName.Redact.equals(annotation.getSubtype())) {
extractLocationsFromSingleRedactAnnotation((PdfRedactAnnotation) annotation);
}
}
}
/**
* Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified
* by the QuadPoints entry in the annotation dictionary.
*/
private void extractLocationsFromSingleRedactAnnotation(PdfRedactAnnotation redactAnnotation) {
List regions;
PdfArray quadPoints = redactAnnotation.getQuadPoints();
if (quadPoints != null && !quadPoints.isEmpty()) {
regions = translateQuadPointsToRectangles(quadPoints);
} else {
regions = new ArrayList<>();
regions.add(redactAnnotation.getRectangle().toRectangle());
}
redactAnnotations.put(redactAnnotation, regions);
int page = pdfDocument.getPageNumber(redactAnnotation.getPage());
Color cleanUpColor = redactAnnotation.getInteriorColor();
PdfDictionary ro = redactAnnotation.getRedactRolloverAppearance();
if (ro != null) {
cleanUpColor = null;
}
for (Rectangle region : regions) {
addCleanupLocation(new PdfCleanUpLocation(page, region, cleanUpColor));
}
}
/**
* Convert a PdfArray of floats into a List of Rectangle objects.
*
* @param quadPoints input PdfArray
*/
private List translateQuadPointsToRectangles(PdfArray quadPoints) {
List rectangles = new ArrayList();
for (int i = 0; i < quadPoints.size(); i += 8) {
float x = quadPoints.getAsNumber(i + 4).floatValue();
float y = quadPoints.getAsNumber(i + 5).floatValue();
float width = quadPoints.getAsNumber(i + 2).floatValue() - x;
float height = quadPoints.getAsNumber(i + 3).floatValue() - y;
rectangles.add(new Rectangle(x, // QuadPoints in redact annotations have "Z" order
y,
width,
height));
}
return rectangles;
}
/**
* Remove the redaction annotations.
* This method is called after the annotations are processed.
*
* @throws IOException signals that an I/O exception has occurred during redaction.
*/
private void removeRedactAnnots() throws IOException {
for (PdfRedactAnnotation annotation : redactAnnotations.keySet()) {
PdfPage page = annotation.getPage();
if (page != null) {
page.removeAnnotation(annotation);
PdfPopupAnnotation popup = annotation.getPopup();
if (popup != null) {
page.removeAnnotation(popup);
}
}
PdfCanvas canvas = new PdfCanvas(page);
PdfStream redactRolloverAppearance = annotation.getRedactRolloverAppearance();
PdfString overlayText = annotation.getOverlayText();
Rectangle annotRect = annotation.getRectangle().toRectangle();
if (redactRolloverAppearance != null) {
drawRolloverAppearance(canvas, redactRolloverAppearance, annotRect, redactAnnotations.get(annotation));
} else if (overlayText != null && !overlayText.toUnicodeString().isEmpty()) {
drawOverlayText(canvas, overlayText.toUnicodeString(), annotRect, annotation.getRepeat(),
annotation.getDefaultAppearance(), annotation.getJustification());
}
}
}
private void drawRolloverAppearance(PdfCanvas canvas, PdfStream redactRolloverAppearance, Rectangle annotRect,
List cleanedRegions) {
if (pdfDocument.isTagged()) {
canvas.openTag(new CanvasArtifact());
}
canvas.saveState();
for (Rectangle rect : cleanedRegions) {
canvas.rectangle(rect.getLeft(), rect.getBottom(), rect.getWidth(), rect.getHeight());
}
canvas.clip().endPath();
PdfFormXObject formXObject = new PdfFormXObject(redactRolloverAppearance);
canvas.addXObjectWithTransformationMatrix(formXObject, 1, 0, 0, 1, annotRect.getLeft(), annotRect.getBottom());
canvas.restoreState();
if (pdfDocument.isTagged()) {
canvas.closeTag();
}
}
private void drawOverlayText(PdfCanvas canvas, String overlayText, Rectangle annotRect, PdfBoolean repeat,
PdfString defaultAppearance, int justification) throws IOException {
Map parsedDA;
try {
parsedDA = parseDAParam(defaultAppearance);
}catch (NullPointerException npe){
throw new PdfException(CleanupExceptionMessageConstant.DEFAULT_APPEARANCE_NOT_FOUND);
}
PdfFont font;
float fontSize = 12;
List fontArgs = parsedDA.get("Tf");
PdfDictionary formDictionary = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm);
if (fontArgs != null && formDictionary != null) {
font = getFontFromAcroForm((PdfName) fontArgs.get(0));
fontSize = ((PdfNumber) fontArgs.get(1)).floatValue();
} else {
font = PdfFontFactory.createFont();
}
if (pdfDocument.isTagged()) {
canvas.openTag(new CanvasArtifact());
}
Canvas modelCanvas = new Canvas(canvas, annotRect, false);
Paragraph p = new Paragraph(overlayText).setFont(font).setFontSize(fontSize).setMargin(0);
TextAlignment textAlignment = TextAlignment.LEFT;
switch (justification) {
case 1:
textAlignment = TextAlignment.CENTER;
break;
case 2:
textAlignment = TextAlignment.RIGHT;
break;
default:
}
p.setTextAlignment(textAlignment);
List strokeColorArgs = parsedDA.get("StrokeColor");
if (strokeColorArgs != null) {
p.setStrokeColor(getColor(strokeColorArgs));
}
List fillColorArgs = parsedDA.get("FillColor");
if (fillColorArgs != null) {
p.setFontColor(getColor(fillColorArgs));
}
modelCanvas.add(p);
if (repeat != null && repeat.getValue()) {
boolean hasFull = modelCanvas.getRenderer().hasProperty(Property.FULL);
boolean isFull = hasFull ? (boolean) modelCanvas.getRenderer().getPropertyAsBoolean(Property.FULL) : false;
while (!isFull) {
p.add(overlayText);
LayoutArea previousArea = modelCanvas.getRenderer().getCurrentArea().clone();
modelCanvas.relayout();
if (modelCanvas.getRenderer().getCurrentArea().equals(previousArea)) {
// Avoid infinite loop. This might be caused by the fact that the font does not support the text we want to show
break;
}
hasFull = modelCanvas.getRenderer().hasProperty(Property.FULL);
isFull = hasFull ? (boolean) modelCanvas.getRenderer().getPropertyAsBoolean(Property.FULL) : false;
}
}
modelCanvas.getRenderer().flush();
if (pdfDocument.isTagged()) {
canvas.closeTag();
}
}
private Map parseDAParam(PdfString DA) throws IOException {
Map commandArguments = new HashMap();
PdfTokenizer tokeniser = new PdfTokenizer(
new RandomAccessFileOrArray(
new RandomAccessSourceFactory().createSource(
DA.toUnicodeString().getBytes(StandardCharsets.UTF_8)
)
)
);
List currentArguments = new ArrayList();
while (tokeniser.nextToken()) {
if (tokeniser.getTokenType() == PdfTokenizer.TokenType.Other) {
String key = tokeniser.getStringValue();
if ("RG".equals(key) || "G".equals(key) || "K".equals(key)) {
key = "StrokeColor";
} else if ("rg".equals(key) || "g".equals(key) || "k".equals(key)) {
key = "FillColor";
}
commandArguments.put(key, currentArguments);
currentArguments = new ArrayList();
} else {
switch (tokeniser.getTokenType()) {
case Number:
currentArguments.add(new PdfNumber(new Float(tokeniser.getStringValue())));
break;
case Name:
currentArguments.add(new PdfName(tokeniser.getStringValue()));
break;
default:
currentArguments.add(tokeniser.getStringValue());
}
}
}
return commandArguments;
}
private PdfFont getFontFromAcroForm(PdfName fontName) {
PdfDictionary formDictionary = pdfDocument.getCatalog().getPdfObject().getAsDictionary(PdfName.AcroForm);
PdfDictionary resources = formDictionary.getAsDictionary(PdfName.DR);
PdfDictionary fonts = resources.getAsDictionary(PdfName.Font);
return PdfFontFactory.createFont(fonts.getAsDictionary(fontName));
}
private Color getColor(List colorArgs) {
Color color = null;
switch (colorArgs.size()) {
case 1:
color = new DeviceGray(((PdfNumber) colorArgs.get(0)).floatValue());
break;
case 3:
color = new DeviceRgb(((PdfNumber) colorArgs.get(0)).floatValue(),
((PdfNumber) colorArgs.get(1)).floatValue(),
((PdfNumber) colorArgs.get(2)).floatValue());
break;
case 4:
color = new DeviceCmyk(((PdfNumber) colorArgs.get(0)).floatValue(),
((PdfNumber) colorArgs.get(1)).floatValue(),
((PdfNumber) colorArgs.get(2)).floatValue(),
((PdfNumber) colorArgs.get(3)).floatValue());
break;
}
return color;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy