com.itextpdf.text.pdf.pdfcleanup.PdfCleanUpContentOperator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of itext-xtra Show documentation
Show all versions of itext-xtra Show documentation
iText Xtra, part of iText a Free Java-PDF library
The newest version!
/*
*
* This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
* Authors: Bruno Lowagie, Paulo Soares, et al.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3
* as published by the Free Software Foundation with the addition of the
* following permission added to Section 15 as permitted in Section 7(a):
* FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
* ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
* OF THIRD PARTY RIGHTS
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses or write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA, 02110-1301 USA, or download the license from the following URL:
* http://itextpdf.com/terms-of-use/
*
* The interactive user interfaces in modified source and object code versions
* of this program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public License,
* a covered work must retain the producer line in every PDF that is created
* or manipulated using iText.
*
* You can be released from the requirements of the license by purchasing
* a commercial license. Buying such a license is mandatory as soon as you
* develop commercial activities involving the iText software without
* disclosing the source code of your own applications.
* These activities include: offering paid services to customers as an ASP,
* serving PDFs on the fly in a web application, shipping iText with a closed
* source product.
*
* For more information, please contact iText Software Corp. at this
* address: [email protected]
*/
package com.itextpdf.text.pdf.pdfcleanup;
import com.itextpdf.awt.geom.Point2D;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.DocWriter;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.pdf.parser.*;
import java.io.IOException;
import java.io.OutputStream;
import java.util.*;
class PdfCleanUpContentOperator implements ContentOperator {
private static final byte[] TStar = DocWriter.getISOBytes("T*\n");
private static final byte[] Tw = DocWriter.getISOBytes(" Tw ");
private static final byte[] TcTStar = DocWriter.getISOBytes(" Tc T*\n");
private static final byte[] TJ = DocWriter.getISOBytes("] TJ\n");
private static final byte[] Tc = DocWriter.getISOBytes(" Tc\n");
private static final byte[] m = DocWriter.getISOBytes(" m\n");
private static final byte[] l = DocWriter.getISOBytes(" l\n");
private static final byte[] c = DocWriter.getISOBytes(" c\n");
private static final byte[] h = DocWriter.getISOBytes("h\n");
private static final byte[] S = DocWriter.getISOBytes("S\n");
private static final byte[] f = DocWriter.getISOBytes("f\n");
private static final byte[] eoF = DocWriter.getISOBytes("f*\n");
private static final byte[] n = DocWriter.getISOBytes("n\n");
private static final byte[] W = DocWriter.getISOBytes("W\n");
private static final byte[] eoW = DocWriter.getISOBytes("W*\n");
private static final byte[] q = DocWriter.getISOBytes("q\n");
private static final byte[] Q = DocWriter.getISOBytes("Q\n");
private static final byte[] cs = DocWriter.getISOBytes("cs\n");
private static final Set textShowingOperators = new HashSet(Arrays.asList("TJ", "Tj", "'", "\""));
private static final Set pathConstructionOperators = new HashSet(Arrays.asList("m", "l", "c", "v", "y", "h", "re"));
private static final Set strokeOperators = new HashSet(Arrays.asList("S", "s", "B", "B*", "b", "b*"));
private static final Set nwFillOperators = new HashSet(Arrays.asList("f", "F", "B", "b"));
private static final Set eoFillOperators = new HashSet(Arrays.asList("f*", "B*", "b*"));
private static final Set pathPaintingOperators = new HashSet() {{
addAll(strokeOperators);
addAll(nwFillOperators);
addAll(eoFillOperators);
add("n");
}};
private static final Set clippingPathOperators = new HashSet(Arrays.asList("W", "W*"));
private static final Set lineStyleOperators = new HashSet(Arrays.asList("w", "J", "j", "M", "d"));
private static final Set strokeColorOperators = new HashSet(Arrays.asList("CS", "SC", "SCN", "G", "RG", "K"));
protected PdfCleanUpRenderListener cleanUpStrategy;
protected ContentOperator originalContentOperator;
public PdfCleanUpContentOperator(PdfCleanUpRenderListener cleanUpStrategy) {
this.cleanUpStrategy = cleanUpStrategy;
}
public static void populateOperators(PdfContentStreamProcessor contentProcessor,
PdfCleanUpRenderListener pdfCleanUpRenderListener) {
for (String operator : contentProcessor.getRegisteredOperatorStrings()) {
PdfCleanUpContentOperator contentOperator = new PdfCleanUpContentOperator(pdfCleanUpRenderListener);
contentOperator.originalContentOperator = contentProcessor.registerContentOperator(operator, contentOperator);
}
}
public void invoke(PdfContentStreamProcessor pdfContentStreamProcessor, PdfLiteral operator, ArrayList operands) throws Exception {
String operatorStr = operator.toString();
PdfContentByte canvas = cleanUpStrategy.getContext().getCanvas();
PRStream xFormStream = null;
boolean disableOutput = pathConstructionOperators.contains(operatorStr) || pathPaintingOperators.contains(operatorStr) || clippingPathOperators.contains(operatorStr);
GraphicsState gs = pdfContentStreamProcessor.gs();
// key - number of a string in the TJ operator, value - number following the string; the first number without string (if it's presented) is stored under 0.
// BE AWARE: zero-length strings are ignored!!!
Map structuredTJoperands = null;
if ("Do".equals(operatorStr)) {
if (operands.size() == 2 && operands.get(0).isName()) {
PdfDictionary xObjResources = cleanUpStrategy.getContext().getResources().getAsDict(PdfName.XOBJECT);
if (xObjResources != null) {
PdfStream xObj = xObjResources.getAsStream((PdfName) operands.get(0));
if (xObj instanceof PRStream && xObj.getAsName(PdfName.SUBTYPE) != null &&
xObj.getAsName(PdfName.SUBTYPE).compareTo(PdfName.FORM) == 0) {
xFormStream = (PRStream) xObj;
cleanUpStrategy.registerNewContext(xObj.getAsDict(PdfName.RESOURCES), null);
}
}
}
}
originalContentOperator.invoke(pdfContentStreamProcessor, operator, operands);
List chunks = cleanUpStrategy.getChunks();
if (xFormStream != null) {
xFormStream.setData(cleanUpStrategy.getContext().getCanvas().toPdf(cleanUpStrategy.getContext().getCanvas().getPdfWriter()));
cleanUpStrategy.popContext();
canvas = cleanUpStrategy.getContext().getCanvas();
}
if ("Do".equals(operatorStr)) {
if (chunks.size() > 0 && chunks.get(0) instanceof PdfCleanUpContentChunk.Image) {
PdfCleanUpContentChunk.Image chunk = (PdfCleanUpContentChunk.Image) chunks.get(0);
if (chunk.isVisible()) {
PdfDictionary xObjResources = cleanUpStrategy.getContext().getResources().getAsDict(PdfName.XOBJECT);
PRStream imageStream = (PRStream) xObjResources.getAsStream((PdfName) operands.get(0));
updateImageStream(imageStream, chunk.getNewImageData());
} else {
disableOutput = true;
}
}
} else if (textShowingOperators.contains(operatorStr) && !allChunksAreVisible(cleanUpStrategy.getChunks())) {
disableOutput = true;
if ("'".equals(operatorStr)) {
canvas.getInternalBuffer().append(TStar);
} else if ("\"".equals(operatorStr)) {
operands.get(0).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(Tw);
operands.get(1).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(TcTStar);
} else if ("TJ".equals(operatorStr)) {
structuredTJoperands = structureTJarray((PdfArray) operands.get(0));
}
writeTextChunks(structuredTJoperands, chunks, canvas, gs.getCharacterSpacing(), gs.getWordSpacing(),
gs.getFontSize(), gs.getHorizontalScaling());
} else if (pathPaintingOperators.contains(operatorStr)) {
writePath(operatorStr, canvas, gs.getColorSpaceStroke());
} else if (strokeColorOperators.contains(operatorStr)) {
// Replace current color with the new one.
cleanUpStrategy.getContext().popStrokeColor();
cleanUpStrategy.getContext().pushStrokeColor(operands);
} else if ("q".equals(operatorStr)) {
cleanUpStrategy.getContext().pushStrokeColor(cleanUpStrategy.getContext().peekStrokeColor());
} else if ("Q".equals(operatorStr)) {
cleanUpStrategy.getContext().popStrokeColor();
}
if (!disableOutput) {
writeOperands(canvas, operands);
}
cleanUpStrategy.clearChunks();
}
private void writeOperands(PdfContentByte canvas, List operands) throws IOException {
int index = 0;
for (PdfObject o : operands) {
toPdf(o, canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(operands.size() > ++index ? (byte) ' ' : (byte) '\n');
}
}
private boolean allChunksAreVisible(List chunks) {
for (PdfCleanUpContentChunk chunk : chunks) {
if (!chunk.isVisible()) {
return false;
}
}
return true;
}
/**
* Overriding standard PdfObject.toPdf because we need sorted PdfDictionaries.
*/
private static void toPdf(PdfObject object, PdfWriter writer, OutputStream os) throws IOException {
if (object instanceof PdfDictionary) {
os.write('<');
os.write('<');
List keys = new ArrayList(((PdfDictionary) object).getKeys());
Collections.sort(keys);
for (PdfName key : keys) {
toPdf(key, writer, os);
PdfObject value = ((PdfDictionary) object).get(key);
int type = value.type();
if (type != PdfObject.ARRAY && type != PdfObject.DICTIONARY && type != PdfObject.NAME &&
type != PdfObject.STRING) {
os.write(' ');
}
toPdf(value, writer, os);
}
os.write('>');
os.write('>');
} else {
object.toPdf(writer, os);
}
}
/**
* Example.
* TJ = [(h) 3 4 (q) 7 (w) (e)]
* Result = {0:0, 1:7, 2:7, 3:0, 4:0}
*
* @return Map whose key is an ordinal number of the string in the TJ array and value
* is the position adjustment.
*/
private Map structureTJarray(PdfArray array) {
Map structuredTJoperands = new HashMap();
if (array.size() == 0) {
return structuredTJoperands;
}
Integer previousStrNum = 0;
structuredTJoperands.put(previousStrNum, 0f);
for (int i = 0; i < array.size(); ++i) {
PdfObject currentObj = array.getPdfObject(i);
if (currentObj instanceof PdfString && ((PdfString) currentObj).toUnicodeString().length() > 0) {
++previousStrNum;
structuredTJoperands.put(previousStrNum, 0f);
} else {
Float oldOffset = structuredTJoperands.get(previousStrNum);
structuredTJoperands.put(previousStrNum, oldOffset + ((PdfNumber) currentObj).floatValue());
}
}
return structuredTJoperands;
}
/**
* Renders parts of text which are visible.
*/
private void writeTextChunks(Map structuredTJoperands, List chunks, PdfContentByte canvas,
float characterSpacing, float wordSpacing, float fontSize, float horizontalScaling) throws IOException {
canvas.setCharacterSpacing(0);
canvas.setWordSpacing(0);
canvas.getInternalBuffer().append((byte) '[');
float convertedCharacterSpacing = -characterSpacing * 1000f / fontSize;
float convertedWordSpacing = -wordSpacing * 1000f / fontSize;
float shift = structuredTJoperands != null ? structuredTJoperands.get(0) : 0;
PdfCleanUpContentChunk.Text prevChunk = null;
for (PdfCleanUpContentChunk chunk : chunks) {
PdfCleanUpContentChunk.Text textChunk = (PdfCleanUpContentChunk.Text) chunk;
if (prevChunk != null && prevChunk.getNumOfStrTextBelongsTo() != textChunk.getNumOfStrTextBelongsTo() &&
structuredTJoperands != null) {
shift += structuredTJoperands.get(prevChunk.getNumOfStrTextBelongsTo());
}
if (textChunk.isVisible()) {
if (Float.compare(shift, 0.0f) != 0 && Float.compare(shift, -0.0f) != 0) {
canvas.getInternalBuffer().append(shift).append(' ');
}
textChunk.getText().toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
shift = convertedCharacterSpacing + (isSpace(textChunk) ? convertedWordSpacing : 0);
} else {
shift += getUnscaledTextChunkWidth(textChunk, characterSpacing, wordSpacing,
fontSize, horizontalScaling);
}
prevChunk = textChunk;
}
if (Float.compare(shift, 0.0f) != 0 && Float.compare(shift, -0.0f) != 0) {
canvas.getInternalBuffer().append(shift);
}
canvas.getInternalBuffer().append(TJ);
if (Float.compare(characterSpacing, 0.0f) != 0 && Float.compare(characterSpacing, -0.0f) != 0) {
new PdfNumber(characterSpacing).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(Tc);
}
if (Float.compare(wordSpacing, 0.0f) != 0 && Float.compare(wordSpacing, -0.0f) != 0) {
new PdfNumber(wordSpacing).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(Tw);
}
}
/**
* We get into this method when the current chunk is not visible.
* Here we are calculating a piece of the Tj coefficient for a previous visible chunk.
* For details see PDF spec., Text Space Details, formula for "tx" coefficient
* and TextRenderInfo class (getUnscaledBaseline)
*/
private float getUnscaledTextChunkWidth(PdfCleanUpContentChunk.Text chunk, float characterSpacing,
float wordSpacing, float fontSize, float horizontalScaling) {
// Horizontal scaling is stored as the value in [0, 1] interval, so we don't need to divide it on 100;
// also we need to add character and word spaces because TextRenderInfo class truncates them from the end of the string
// (single character string in our case is also truncated)
float scaledChunkWidth = (chunk.getEndX() - chunk.getStartX()) +
(characterSpacing + (isSpace(chunk) ? wordSpacing : 0)) * horizontalScaling;
return -scaledChunkWidth * 1000f / (horizontalScaling * fontSize);
}
private boolean isSpace(PdfCleanUpContentChunk.Text chunk) {
return chunk.getText().toUnicodeString().equals(" ");
}
private void updateImageStream(PRStream imageStream, byte[] newData) throws BadElementException, IOException, BadPdfFormatException {
PdfImage image = new PdfImage(Image.getInstance(newData), "", null);
if (imageStream.contains(PdfName.SMASK)) {
image.put(PdfName.SMASK, imageStream.get(PdfName.SMASK));
}
if (imageStream.contains(PdfName.MASK)) {
image.put(PdfName.MASK, imageStream.get(PdfName.MASK));
}
if (imageStream.contains(PdfName.SMASKINDATA)) {
image.put(PdfName.SMASKINDATA, imageStream.get(PdfName.SMASKINDATA));
}
imageStream.clear();
imageStream.putAll(image);
imageStream.setDataRaw(image.getBytes());
}
private void writePath(String operatorStr, PdfContentByte canvas, PdfName strokeColorSpace) throws IOException {
if (nwFillOperators.contains(operatorStr)) {
writePath(cleanUpStrategy.getCurrentFillPath(), f, canvas);
} else if (eoFillOperators.contains(operatorStr)) {
writePath(cleanUpStrategy.getCurrentFillPath(), eoF, canvas);
}
if (strokeOperators.contains(operatorStr)) {
writeStroke(canvas, cleanUpStrategy.getCurrentStrokePath(), strokeColorSpace);
}
if (cleanUpStrategy.isClipped()) {
if (!cleanUpStrategy.getNewClipPath().isEmpty()) {
byte[] clippingOperator = (cleanUpStrategy.getClippingRule() == PathPaintingRenderInfo.NONZERO_WINDING_RULE) ? W : eoW;
writePath(cleanUpStrategy.getNewClipPath(), clippingOperator, canvas);
} else {
// If the clipping path from the source document is cleaned (it happens when reduction
// area covers the path completely), then you should treat it as an empty set (no points
// are included in the path). Then the current clipping path (which is the intersection
// between previous clipping path and the new one) is also empty set, which means that
// there is no visible content at all. But at the same time as we removed the clipping
// path, the invisible content would become visible. So, to emulate the correct result,
// we would simply put a degenerate clipping path which consists of a single point at (0, 0).
Path degeneratePath = new Path();
degeneratePath.moveTo(0, 0);
writePath(degeneratePath, W, canvas);
}
canvas.getInternalBuffer().append(n);
cleanUpStrategy.setClipped(false);
}
}
private void writePath(Path path, byte[] pathPaintingOperator, PdfContentByte canvas) throws IOException {
if (path.isEmpty()) {
return;
}
for (Subpath subpath : path.getSubpaths()) {
writeMoveTo(subpath.getStartPoint(), canvas);
for (Shape segment : subpath.getSegments()) {
if (segment instanceof BezierCurve) {
writeBezierCurve((BezierCurve) segment, canvas);
} else {
writeLine((Line) segment, canvas);
}
}
if (subpath.isClosed()) {
canvas.getInternalBuffer().append(h);
}
}
if (pathPaintingOperator != null) {
canvas.getInternalBuffer().append(pathPaintingOperator);
}
}
private void writeMoveTo(Point2D destinationPoint, PdfContentByte canvas) throws IOException {
new PdfNumber(destinationPoint.getX()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(destinationPoint.getY()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(m);
}
private void writeBezierCurve(BezierCurve curve, PdfContentByte canvas) throws IOException {
List basePoints = curve.getBasePoints();
Point2D p2 = basePoints.get(1);
Point2D p3 = basePoints.get(2);
Point2D p4 = basePoints.get(3);
new PdfNumber(p2.getX()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(p2.getY()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(p3.getX()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(p3.getY()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(p4.getX()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(p4.getY()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(c);
}
private void writeLine(Line line, PdfContentByte canvas) throws IOException {
Point2D destination = line.getBasePoints().get(1);
new PdfNumber(destination.getX()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ');
new PdfNumber(destination.getY()).toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(l);
}
private void writeStroke(PdfContentByte canvas, Path path, PdfName strokeColorSpace) throws IOException {
canvas.getInternalBuffer().append(q);
if (strokeColorSpace != null) {
strokeColorSpace.toPdf(canvas.getPdfWriter(), canvas.getInternalBuffer());
canvas.getInternalBuffer().append(' ').append(cs);
}
List strokeColorOperands = cleanUpStrategy.getContext().peekStrokeColor();
String strokeOperatorStr = strokeColorOperands.get(strokeColorOperands.size() - 1).toString();
// Below expression converts stroke color operator to its fill analogue.
strokeColorOperands.set(strokeColorOperands.size() - 1, new PdfLiteral(strokeOperatorStr.toLowerCase()));
writeOperands(canvas, strokeColorOperands);
writePath(path, f, canvas);
canvas.getInternalBuffer().append(Q);
}
}