com.itextpdf.text.pdf.pdfcleanup.PdfCleanUpProcessor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of itext-xtra Show documentation
Show all versions of itext-xtra Show documentation
iText Xtra, part of iText a Free Java-PDF library
The newest version!
/*
*
* This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
* Authors: Bruno Lowagie, Paulo Soares, et al.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3
* as published by the Free Software Foundation with the addition of the
* following permission added to Section 15 as permitted in Section 7(a):
* FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
* ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
* OF THIRD PARTY RIGHTS
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses or write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA, 02110-1301 USA, or download the license from the following URL:
* http://itextpdf.com/terms-of-use/
*
* The interactive user interfaces in modified source and object code versions
* of this program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public License,
* a covered work must retain the producer line in every PDF that is created
* or manipulated using iText.
*
* You can be released from the requirements of the license by purchasing
* a commercial license. Buying such a license is mandatory as soon as you
* develop commercial activities involving the iText software without
* disclosing the source code of your own applications.
* These activities include: offering paid services to customers as an ASP,
* serving PDFs on the fly in a web application, shipping iText with a closed
* source product.
*
* For more information, please contact iText Software Corp. at this
* address: [email protected]
*/
package com.itextpdf.text.pdf.pdfcleanup;
import com.itextpdf.text.*;
import com.itextpdf.text.io.RandomAccessSourceFactory;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.pdf.parser.ContentByteUtils;
import com.itextpdf.text.pdf.parser.PdfContentStreamProcessor;
import java.io.IOException;
import java.util.*;
import java.util.List;
/**
* Represents the main mechanism for cleaning a PDF document.
*
* @since 5.5.4
*/
public class PdfCleanUpProcessor {
/**
* When a document with line arts is being cleaned up, there are lot of
* calculations with floating point numbers. All of them are translated
* into fixed point numbers by multiplying by this coefficient. Vary it
* to adjust the preciseness of the calculations.
*/
public static double floatMultiplier = Math.pow(10, 14);
public static boolean fillCleanedArea = true;
/**
* Used as the criterion of a good approximation of rounded line joins
* and line caps.
*/
public static double arcTolerance = 0.0025;
private static final String XOBJ_NAME_PREFIX = "Fm";
private static final String STROKE_COLOR = "StrokeColor";
private static final String FILL_COLOR = "FillColor";
private int currentXObjNum = 0;
private PdfStamper pdfStamper;
// key - page number, value - list of locations related to the page
private Map> pdfCleanUpLocations;
// key - number of page containing redact annotations, value - look at variable name
private Map> redactAnnotIndirRefs;
// stores list of rectangles for annotation identified by it's index in Annots array
private Map> clippingRects;
/**
* Creates a {@link com.itextpdf.text.pdf.pdfcleanup.PdfCleanUpProcessor} object based on the
* given {@link java.util.List} of {@link com.itextpdf.text.pdf.pdfcleanup.PdfCleanUpLocation}s
* representing regions to be erased from the document.
*
* @param pdfCleanUpLocations list of locations to be cleaned up {@see PdfCleanUpLocation}
* @param pdfStamper A{@link com.itextpdf.text.pdf.PdfStamper} object representing the document which redaction
* applies to.
*/
public PdfCleanUpProcessor(List pdfCleanUpLocations, PdfStamper pdfStamper) {
this.pdfCleanUpLocations = organizeLocationsByPage(pdfCleanUpLocations);
this.pdfStamper = pdfStamper;
}
/**
* Creates a {@link com.itextpdf.text.pdf.pdfcleanup.PdfCleanUpProcessor} object. Regions to be erased from
* the document are extracted from the redact annotations contained inside the given document.
*
* @param pdfStamper A{@link com.itextpdf.text.pdf.PdfStamper} object representing the document which redaction
* applies to.
*/
public PdfCleanUpProcessor(PdfStamper pdfStamper) {
this.redactAnnotIndirRefs = new HashMap>();
this.clippingRects = new HashMap>();
this.pdfStamper = pdfStamper;
extractLocationsFromRedactAnnots();
}
/**
* Cleans the document by erasing all the areas which are either provided or
* extracted from redaction annotations.
*
* @throws IOException
* @throws DocumentException
*/
public void cleanUp() throws IOException, DocumentException {
for (Map.Entry> entry : pdfCleanUpLocations.entrySet()) {
cleanUpPage(entry.getKey(), entry.getValue());
}
pdfStamper.getReader().removeUnusedObjects();
}
private void cleanUpPage(int pageNum, List cleanUpLocations) throws IOException, DocumentException {
if (cleanUpLocations.size() == 0) {
return;
}
PdfReader pdfReader = pdfStamper.getReader();
PdfDictionary page = pdfReader.getPageN(pageNum);
PdfContentByte canvas = pdfStamper.getUnderContent(pageNum);
byte[] pageContentInput = ContentByteUtils.getContentBytesForPage(pdfReader, pageNum);
page.remove(PdfName.CONTENTS);
canvas.saveState();
PdfCleanUpRegionFilter filter = createFilter(cleanUpLocations);
PdfCleanUpRenderListener pdfCleanUpRenderListener = new PdfCleanUpRenderListener(pdfStamper, filter);
pdfCleanUpRenderListener.registerNewContext(pdfReader.getPageResources(page), canvas);
PdfContentStreamProcessor contentProcessor = new PdfContentStreamProcessor(pdfCleanUpRenderListener);
PdfCleanUpContentOperator.populateOperators(contentProcessor, pdfCleanUpRenderListener);
contentProcessor.processContent(pageContentInput, page.getAsDict(PdfName.RESOURCES));
pdfCleanUpRenderListener.popContext();
canvas.restoreState();
colorCleanedLocations(canvas, cleanUpLocations);
if (redactAnnotIndirRefs != null) { // if it isn't null, then we are in "extract locations from redact annots" mode
deleteRedactAnnots(pageNum);
}
}
private PdfCleanUpRegionFilter createFilter(List cleanUpLocations) {
List regions = new ArrayList(cleanUpLocations.size());
for (PdfCleanUpLocation location : cleanUpLocations) {
regions.add(location.getRegion());
}
return new PdfCleanUpRegionFilter(regions);
}
private void colorCleanedLocations(PdfContentByte canvas, List cleanUpLocations) {
if (fillCleanedArea) {
for (PdfCleanUpLocation location : cleanUpLocations) {
if (location.getCleanUpColor() != null) {
addColoredRectangle(canvas, location);
}
}
}
}
private void addColoredRectangle(PdfContentByte canvas, PdfCleanUpLocation cleanUpLocation) {
Rectangle cleanUpRegion = cleanUpLocation.getRegion();
canvas.saveState();
canvas.setColorFill(cleanUpLocation.getCleanUpColor());
canvas.moveTo(cleanUpRegion.getLeft(), cleanUpRegion.getBottom());
canvas.lineTo(cleanUpRegion.getRight(), cleanUpRegion.getBottom());
canvas.lineTo(cleanUpRegion.getRight(), cleanUpRegion.getTop());
canvas.lineTo(cleanUpRegion.getLeft(), cleanUpRegion.getTop());
canvas.closePath();
canvas.fill();
canvas.restoreState();
}
private Map> organizeLocationsByPage(Collection pdfCleanUpLocations) {
Map> organizedLocations = new HashMap>();
for (PdfCleanUpLocation location : pdfCleanUpLocations) {
Integer page = location.getPage();
if (!organizedLocations.containsKey(page)) {
organizedLocations.put(page, new ArrayList());
}
organizedLocations.get(page).add(location);
}
return organizedLocations;
}
/**
* Extracts locations from the redact annotations contained in the document.
*/
private void extractLocationsFromRedactAnnots() {
this.pdfCleanUpLocations = new HashMap>();
PdfReader reader = pdfStamper.getReader();
for (int i = 1; i <= reader.getNumberOfPages(); ++i) {
PdfDictionary pageDict = reader.getPageN(i);
this.pdfCleanUpLocations.put(i, extractLocationsFromRedactAnnots(i, pageDict));
}
}
/**
* Extracts locations from the redact annotations contained in the document and applied to the given page.
*/
private List extractLocationsFromRedactAnnots(int page, PdfDictionary pageDict) {
List locations = new ArrayList();
if (pageDict.contains(PdfName.ANNOTS)) {
PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);
for (int i = 0; i < annotsArray.size(); ++i) {
PdfIndirectReference annotIndirRef = annotsArray.getAsIndirectObject(i);
PdfDictionary annotDict = annotsArray.getAsDict(i);
PdfName annotSubtype = annotDict.getAsName(PdfName.SUBTYPE);
if (annotSubtype.equals(PdfName.REDACT)) {
saveRedactAnnotIndirRef(page, annotIndirRef.toString());
locations.addAll(extractLocationsFromRedactAnnot(page, i, annotDict));
}
}
}
return locations;
}
private void saveRedactAnnotIndirRef(int page, String indRefStr) {
if (!redactAnnotIndirRefs.containsKey(page)) {
redactAnnotIndirRefs.put(page, new HashSet());
}
redactAnnotIndirRefs.get(page).add(indRefStr);
}
/**
* Extracts locations from the concrete annotation.
* Note: annotation can consist not only of one area specified by the RECT entry, but also of multiple areas specified
* by the QuadPoints entry in the annotation dictionary.
*/
private List extractLocationsFromRedactAnnot(int page, int annotIndex, PdfDictionary annotDict) {
List locations = new ArrayList();
List markedRectangles = new ArrayList();
PdfArray quadPoints = annotDict.getAsArray(PdfName.QUADPOINTS);
if (quadPoints.size() != 0) {
markedRectangles.addAll( translateQuadPointsToRectangles(quadPoints) );
} else {
PdfArray annotRect = annotDict.getAsArray(PdfName.RECT);
markedRectangles.add(new Rectangle(annotRect.getAsNumber(0).floatValue(),
annotRect.getAsNumber(1).floatValue(),
annotRect.getAsNumber(2).floatValue(),
annotRect.getAsNumber(3).floatValue()));
}
clippingRects.put(annotIndex, markedRectangles);
BaseColor cleanUpColor = null;
PdfArray ic = annotDict.getAsArray(PdfName.IC);
if (ic != null) {
cleanUpColor = new BaseColor(
ic.getAsNumber(0).floatValue(),
ic.getAsNumber(1).floatValue(),
ic.getAsNumber(2).floatValue()
);
}
PdfStream ro = annotDict.getAsStream(PdfName.RO);
if (ro != null) {
cleanUpColor = null;
}
for (Rectangle rect : markedRectangles) {
locations.add(new PdfCleanUpLocation(page, rect, cleanUpColor));
}
return locations;
}
private List translateQuadPointsToRectangles(PdfArray quadPoints) {
List rectangles = new ArrayList();
for (int i = 0; i < quadPoints.size(); i += 8) {
rectangles.add(new Rectangle(quadPoints.getAsNumber(i + 4).floatValue(), // QuadPoints have "Z" order
quadPoints.getAsNumber(i + 5).floatValue(),
quadPoints.getAsNumber(i + 2).floatValue(),
quadPoints.getAsNumber(i + 3).floatValue()));
}
return rectangles;
}
/**
* Deletes redact annotations from the page and substitutes them with either OverlayText or RO object if it's needed.
*/
private void deleteRedactAnnots(int pageNum) throws IOException, DocumentException {
Set indirRefs = redactAnnotIndirRefs.get(pageNum);
if (indirRefs == null || indirRefs.isEmpty()) {
return;
}
PdfReader reader = pdfStamper.getReader();
PdfContentByte canvas = pdfStamper.getOverContent(pageNum);
PdfDictionary pageDict = reader.getPageN(pageNum);
PdfArray annotsArray = pageDict.getAsArray(PdfName.ANNOTS);
// j is for access annotRect (i can be decreased, so we need to store additional index,
// indicating current position in ANNOTS array in case if we don't remove anything
for (int i = 0, j = 0; i < annotsArray.size(); ++i, ++j) {
PdfIndirectReference annotIndRef = annotsArray.getAsIndirectObject(i);
PdfDictionary annotDict = annotsArray.getAsDict(i);
if (indirRefs.contains(annotIndRef.toString()) || indirRefs.contains(getParentIndRefStr(annotDict))) {
PdfStream formXObj = annotDict.getAsStream(PdfName.RO);
PdfString overlayText = annotDict.getAsString(PdfName.OVERLAYTEXT);
if (fillCleanedArea && formXObj != null) {
PdfArray rectArray = annotDict.getAsArray(PdfName.RECT);
Rectangle annotRect = new Rectangle(rectArray.getAsNumber(0).floatValue(),
rectArray.getAsNumber(1).floatValue(),
rectArray.getAsNumber(2).floatValue(),
rectArray.getAsNumber(3).floatValue());
insertFormXObj(canvas, pageDict, formXObj, clippingRects.get(j), annotRect);
} else if (fillCleanedArea && overlayText != null && overlayText.toUnicodeString().length() > 0) {
drawOverlayText(canvas, clippingRects.get(j), overlayText,
annotDict.getAsString(PdfName.DA),
annotDict.getAsNumber(PdfName.Q),
annotDict.getAsBoolean(PdfName.REPEAT));
}
annotsArray.remove(i--); // array size is changed, so we need to decrease i
}
}
if (annotsArray.size() == 0) {
pageDict.remove(PdfName.ANNOTS);
}
}
private void insertFormXObj(PdfContentByte canvas, PdfDictionary pageDict, PdfStream formXObj, List clippingRects, Rectangle annotRect) throws IOException {
PdfName xobjName = generateNameForXObj(pageDict);
canvas.saveState();
for (Rectangle rect : clippingRects) {
canvas.rectangle(rect.getLeft(), rect.getBottom(), rect.getWidth(), rect.getHeight());
}
canvas.clip();
canvas.newPath();
canvas.addFormXObj(formXObj, xobjName, 1, 0, 0, 1, annotRect.getLeft(), annotRect.getBottom());
canvas.restoreState();
}
private void drawOverlayText(PdfContentByte canvas, List textRectangles, PdfString overlayText,
PdfString otDA, PdfNumber otQ, PdfBoolean otRepeat) throws DocumentException, IOException {
ColumnText ct = new ColumnText(canvas);
ct.setLeading(0, 1.2F);
ct.setUseAscender(true);
String otStr = overlayText.toUnicodeString();
canvas.saveState();
Map parsedDA = parseDAParam(otDA);
Font font = null;
if (parsedDA.containsKey(STROKE_COLOR)) {
List strokeColorArgs = parsedDA.get(STROKE_COLOR);
setStrokeColor(canvas, strokeColorArgs);
}
if (parsedDA.containsKey(FILL_COLOR)) {
List fillColorArgs = parsedDA.get(FILL_COLOR);
setFillColor(canvas, fillColorArgs);
}
if (parsedDA.containsKey("Tf")) {
List tfArgs = parsedDA.get("Tf");
font = retrieveFontFromAcroForm((PdfName) tfArgs.get(0), (PdfNumber) tfArgs.get(1));
}
for (Rectangle textRect : textRectangles) {
ct.setSimpleColumn(textRect);
if (otQ != null) {
ct.setAlignment(otQ.intValue());
}
Phrase otPhrase;
if (font != null) {
otPhrase = new Phrase(otStr, font);
} else {
otPhrase = new Phrase(otStr);
}
float y = ct.getYLine();
if (otRepeat != null && otRepeat.booleanValue()) {
int status = ct.go(true);
while (!ColumnText.hasMoreText(status)) {
otPhrase.add(otStr);
ct.setText(otPhrase);
ct.setYLine(y);
status = ct.go(true);
}
}
ct.setText(otPhrase);
ct.setYLine(y);
ct.go();
}
canvas.restoreState();
}
private Font retrieveFontFromAcroForm(PdfName fontName, PdfNumber size) {
PdfIndirectReference fontIndirReference = pdfStamper.getReader().getAcroForm().getAsDict(PdfName.DR).getAsDict(PdfName.FONT).getAsIndirectObject(fontName);
BaseFont bfont = BaseFont.createFont((PRIndirectReference) fontIndirReference);
return new Font(bfont, size.floatValue());
}
Map parseDAParam(PdfString DA) throws IOException {
Map commandArguments = new HashMap();
PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(DA.getBytes())));
List currentArguments = new ArrayList();
while (tokeniser.nextToken()) {
if (tokeniser.getTokenType() == PRTokeniser.TokenType.OTHER) {
String key = tokeniser.getStringValue();
if (key.equals("RG") || key.equals("G") || key.equals("K")) {
key = STROKE_COLOR;
} else if (key.equals("rg") || key.equals("g") || key.equals("k")) {
key = FILL_COLOR;
}
commandArguments.put(key, currentArguments);
currentArguments = new ArrayList();
} else {
switch (tokeniser.getTokenType()) {
case NUMBER:
currentArguments.add(new PdfNumber(tokeniser.getStringValue()));
break;
case NAME:
currentArguments.add(new PdfName(tokeniser.getStringValue()));
break;
default:
currentArguments.add(tokeniser.getStringValue());
}
}
}
return commandArguments;
}
private String getParentIndRefStr(PdfDictionary dict) {
return dict.getAsIndirectObject(PdfName.PARENT).toString();
}
private PdfName generateNameForXObj(PdfDictionary pageDict) {
PdfDictionary resourcesDict = pageDict.getAsDict(PdfName.RESOURCES);
PdfDictionary xobjDict = resourcesDict.getAsDict(PdfName.XOBJECT);
if (xobjDict != null) {
for (PdfName xobjName : xobjDict.getKeys()) {
int xobjNum = getXObjNum(xobjName);
if (currentXObjNum <= xobjNum) {
currentXObjNum = xobjNum + 1;
}
}
}
return new PdfName(XOBJ_NAME_PREFIX + currentXObjNum++);
}
private int getXObjNum(PdfName xobjName) {
String decodedPdfName = PdfName.decodeName(xobjName.toString());
if (decodedPdfName.lastIndexOf(XOBJ_NAME_PREFIX) == -1) {
return 0;
}
String numStr = decodedPdfName.substring( decodedPdfName.lastIndexOf(XOBJ_NAME_PREFIX) + XOBJ_NAME_PREFIX.length() );
return Integer.parseInt(numStr);
}
private void setFillColor(PdfContentByte canvas, List fillColorArgs) {
switch (fillColorArgs.size()) {
case 1:
canvas.setGrayFill(((PdfNumber) fillColorArgs.get(0)).floatValue());
break;
case 3:
canvas.setRGBColorFillF(((PdfNumber) fillColorArgs.get(0)).floatValue(),
((PdfNumber) fillColorArgs.get(1)).floatValue(),
((PdfNumber) fillColorArgs.get(2)).floatValue());
break;
case 4:
canvas.setCMYKColorFillF(((PdfNumber) fillColorArgs.get(0)).floatValue(),
((PdfNumber) fillColorArgs.get(1)).floatValue(),
((PdfNumber) fillColorArgs.get(2)).floatValue(),
((PdfNumber) fillColorArgs.get(3)).floatValue());
break;
}
}
private void setStrokeColor(PdfContentByte canvas, List strokeColorArgs) {
switch (strokeColorArgs.size()) {
case 1:
canvas.setGrayStroke(((PdfNumber) strokeColorArgs.get(0)).floatValue());
break;
case 3:
canvas.setRGBColorStrokeF(((PdfNumber) strokeColorArgs.get(0)).floatValue(),
((PdfNumber) strokeColorArgs.get(1)).floatValue(),
((PdfNumber) strokeColorArgs.get(2)).floatValue());
break;
case 4:
canvas.setCMYKColorFillF(((PdfNumber) strokeColorArgs.get(0)).floatValue(),
((PdfNumber) strokeColorArgs.get(1)).floatValue(),
((PdfNumber) strokeColorArgs.get(2)).floatValue(),
((PdfNumber) strokeColorArgs.get(3)).floatValue());
break;
}
}
}