com.itextpdf.text.pdf.ocg.OCGParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of itext-xtra Show documentation
Show all versions of itext-xtra Show documentation
iText Xtra, part of iText a Free Java-PDF library
The newest version!
/*
*
* This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
* Authors: Bruno Lowagie, Paulo Soares, et al.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License version 3
* as published by the Free Software Foundation with the addition of the
* following permission added to Section 15 as permitted in Section 7(a):
* FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
* ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
* OF THIRD PARTY RIGHTS
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Affero General Public License for more details.
* You should have received a copy of the GNU Affero General Public License
* along with this program; if not, see http://www.gnu.org/licenses or write to
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
* Boston, MA, 02110-1301 USA, or download the license from the following URL:
* http://itextpdf.com/terms-of-use/
*
* The interactive user interfaces in modified source and object code versions
* of this program must display Appropriate Legal Notices, as required under
* Section 5 of the GNU Affero General Public License.
*
* In accordance with Section 7(b) of the GNU Affero General Public License,
* a covered work must retain the producer line in every PDF that is created
* or manipulated using iText.
*
* You can be released from the requirements of the license by purchasing
* a commercial license. Buying such a license is mandatory as soon as you
* develop commercial activities involving the iText software without
* disclosing the source code of your own applications.
* These activities include: offering paid services to customers as an ASP,
* serving PDFs on the fly in a web application, shipping iText with a closed
* source product.
*
* For more information, please contact iText Software Corp. at this
* address: [email protected]
*/
package com.itextpdf.text.pdf.ocg;
import com.itextpdf.text.ExceptionConverter;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PRTokeniser;
import com.itextpdf.text.pdf.PdfContentParser;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfLiteral;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfString;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A helper class for OCGRemover.
*/
public class OCGParser {
/** Constant used for the default operator. */
public static final String DEFAULTOPERATOR = "DefaultOperator";
/** A map with all supported operators operators (PDF syntax). */
protected static final Map operators;
static {
operators = new HashMap();
populateOperators();
}
/** The OutputStream of this worker object. */
protected ByteArrayOutputStream baos;
/** Keeps track of BMC/EMC balance. */
protected int mc_balance = 0;
/** The names of XObjects that shouldn't be shown. */
protected Set xobj;
/** The OCGs that need to be removed. */
protected Set ocgs;
/** The OCG properties. */
protected PdfDictionary properties = null;
/**
* Creates an instance of the OCGParser.
* @param ocgs a set of String values with the names of the OCGs that need to be removed.
*/
public OCGParser(Set ocgs) {
this.ocgs = ocgs;
}
/**
* Parses a stream object and removes OCGs.
* @param stream a stream object
* @param resources the resources dictionary of that object (containing info about the OCGs)
*/
public void parse(PRStream stream, PdfDictionary resources) throws IOException {
baos = new ByteArrayOutputStream();
properties = resources.getAsDict(PdfName.PROPERTIES);
xobj = new HashSet();
PdfDictionary xobjects = resources.getAsDict(PdfName.XOBJECT);
PRStream xobject;
PdfDictionary oc;
PdfString ocname;
if (xobjects != null) {
// remove XObject (form or image) that belong to an OCG that needs to be removed
for (PdfName name : xobjects.getKeys()) {
xobject = (PRStream)xobjects.getAsStream(name);
oc = xobject.getAsDict(PdfName.OC);
if (oc != null) {
ocname = oc.getAsString(PdfName.NAME);
if (ocname != null && ocgs.contains(ocname.toString())) {
xobj.add(name);
}
}
}
for (PdfName name : xobj) {
xobjects.remove(name);
}
}
try {
// parse the content stream
byte[] contentBytes = PdfReader.getStreamBytes(stream);
PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes));
PdfContentParser ps = new PdfContentParser(tokeniser);
ArrayList operands = new ArrayList();
while (ps.parse(operands).size() > 0){
PdfLiteral operator = (PdfLiteral)operands.get(operands.size() - 1);
processOperator(this, operator, operands);
if ("BI".equals(operator.toString())) {
int found = 0;
int ch;
boolean immediateAfterBI = true;
while ((ch = tokeniser.read()) != -1) {
if (!immediateAfterBI || !PRTokeniser.isWhitespace(ch)) {
baos.write(ch);
}
immediateAfterBI = false;
if (found == 0 && PRTokeniser.isWhitespace(ch)){
found++;
} else if (found == 1 && ch == 'E'){
found++;
} else if (found == 1 && PRTokeniser.isWhitespace(ch)){
// this clause is needed if we have a white space character that is part of the image data
// followed by a whitespace character that precedes the EI operator. In this case, we need
// to flush the first whitespace, then treat the current whitespace as the first potential
// character for the end of stream check. Note that we don't increment 'found' here.
} else if (found == 2 && ch == 'I'){
found++;
} else if (found == 3 && PRTokeniser.isWhitespace(ch)){
break;
} else {
found = 0;
}
}
}
}
}
catch (Exception e) {
throw new ExceptionConverter(e);
}
baos.flush();
baos.close();
stream.setData(baos.toByteArray());
}
/**
* Processes an operator.
* @param parser the parser that needs to process the operator
* @param operator the operator
* @param operands its operands
* @throws Exception
*/
protected static void processOperator(OCGParser parser, PdfLiteral operator, List operands) throws Exception{
PdfOperator op = operators.get(operator.toString());
if (op == null)
op = operators.get(DEFAULTOPERATOR);
op.process(parser, operator, operands);
}
/**
* Populates the operators variable.
*/
protected static void populateOperators() {
operators.put(DEFAULTOPERATOR, new CopyContentOperator());
PathConstructionOrPaintingOperator opConstructionPainting = new PathConstructionOrPaintingOperator();
operators.put("m", opConstructionPainting);
operators.put("l", opConstructionPainting);
operators.put("c", opConstructionPainting);
operators.put("v", opConstructionPainting);
operators.put("y", opConstructionPainting);
operators.put("h", opConstructionPainting);
operators.put("re", opConstructionPainting);
operators.put("S", opConstructionPainting);
operators.put("s", opConstructionPainting);
operators.put("f", opConstructionPainting);
operators.put("F", opConstructionPainting);
operators.put("f*", opConstructionPainting);
operators.put("B", opConstructionPainting);
operators.put("B*", opConstructionPainting);
operators.put("b", opConstructionPainting);
operators.put("b*", opConstructionPainting);
operators.put("n", opConstructionPainting);
operators.put("W", opConstructionPainting);
operators.put("W*", opConstructionPainting);
GraphicsOperator graphics = new GraphicsOperator();
operators.put("q", graphics);
operators.put("Q", graphics);
operators.put("w", graphics);
operators.put("J", graphics);
operators.put("j", graphics);
operators.put("M", graphics);
operators.put("d", graphics);
operators.put("ri", graphics);
operators.put("i", graphics);
operators.put("gs", graphics);
operators.put("cm", graphics);
operators.put("g", graphics);
operators.put("G", graphics);
operators.put("rg", graphics);
operators.put("RG", graphics);
operators.put("k", graphics);
operators.put("K", graphics);
operators.put("cs", graphics);
operators.put("CS", graphics);
operators.put("sc", graphics);
operators.put("SC", graphics);
operators.put("scn", graphics);
operators.put("SCN", graphics);
operators.put("sh", graphics);
XObjectOperator xObject = new XObjectOperator();
operators.put("Do", xObject);
InlineImageOperator inlineImage = new InlineImageOperator();
operators.put("BI", inlineImage);
operators.put("EI", inlineImage);
TextOperator text = new TextOperator();
operators.put("BT", text);
operators.put("ID", text);
operators.put("ET", text);
operators.put("Tc", text);
operators.put("Tw", text);
operators.put("Tz", text);
operators.put("TL", text);
operators.put("Tf", text);
operators.put("Tr", text);
operators.put("Ts", text);
operators.put("Td", text);
operators.put("TD", text);
operators.put("Tm", text);
operators.put("T*", text);
operators.put("Tj", text);
operators.put("'", text);
operators.put("\"", text);
operators.put("TJ", text);
MarkedContentOperator markedContent = new MarkedContentOperator();
operators.put("BMC", markedContent);
operators.put("BDC", markedContent);
operators.put("EMC", markedContent);
}
/**
* Checks operands to find out if the corresponding operator needs to be present or not.
* @param operands a list of operands
* @return true if the operators needs to be present.
*/
protected boolean isVisible(List operands) {
if (operands.size() > 1 && xobj.contains(operands.get(0)))
return false;
return true;
}
/**
* Checks if the parser is currently parsing content that needs to be ignored.
* @return true if the content needs to be ignored
*/
protected boolean isToRemoved() {
if (mc_balance > 0) return true;
return false;
}
/**
* Keeps track of the MarkedContent state.
* @param ocref a reference to an OCG dictionary
*/
protected void checkMarkedContentStart(PdfName ocref) {
if (mc_balance > 0) {
mc_balance++;
return;
}
if (properties == null)
return;
PdfDictionary ocdict = properties.getAsDict(ocref);
if (ocdict == null)
return;
PdfString ocname = ocdict.getAsString(PdfName.NAME);
if (ocname == null)
return;
if (ocgs.contains(ocname.toString()))
mc_balance++;
}
/**
* Keeps track of the MarkedContent state.
*/
protected void checkMarkedContentEnd() {
if (mc_balance > 0)
mc_balance--;
}
/**
* Processes an operator
* @param operator the operator
* @param operands its operands
* @param removable is the operator eligable for removal?
* @throws IOException
*/
protected void process(PdfLiteral operator, List operands, boolean removable) throws IOException{
if (removable && isToRemoved())
return;
operands.remove(operator);
for (PdfObject o : operands) {
printsp(o);
}
println(operator);
}
/**
* Writes a PDF object to the OutputStream, followed by a space character.
* @param o
* @throws IOException
*/
protected void printsp(PdfObject o) throws IOException {
o.toPdf(null, baos);
baos.write(' ');
}
/**
* Writes a PDF object to the OutputStream, followed by a newline character.
* @param o
* @throws IOException
*/
protected void println(PdfObject o) throws IOException {
o.toPdf(null, baos);
baos.write('\n');
}
/**
* PDF Operator interface.
*/
public interface PdfOperator {
/**
* Methods that processes an operator
* @param parser the parser
* @param operator the operator
* @param operands its operands
* @throws IOException
*/
public void process(OCGParser parser, PdfLiteral operator, List operands) throws IOException;
}
/**
* Class that knows how to process path construction, path painting and path clipping operators.
*/
private static class PathConstructionOrPaintingOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
parser.process(operator, operands, true);
}
}
/**
* Class that knows how to process graphics state operators.
*/
private static class GraphicsOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
parser.process(operator, operands, false);
}
}
/**
* Class that knows how to process XObject operators.
*/
private static class XObjectOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
if (parser.isVisible(operands))
parser.process(operator, operands, true);
}
}
/**
* Class that knows how to process inline image operators.
*/
private static class InlineImageOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
parser.process(operator, operands, true);
}
}
/**
* Class that knows how to process text state operators.
*/
private static class TextOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
parser.process(operator, operands, true);
}
}
/**
* Class that knows how to process marked content operators.
*/
private static class MarkedContentOperator implements PdfOperator {
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser, PdfLiteral operator,
List operands) throws IOException {
if ("BDC".equals(operator.toString())) {
PdfName operand = null;
if (operands.size() > 1 && PdfName.OC.equals(operands.get(0))) {
operand = (PdfName) operands.get(1);
}
parser.checkMarkedContentStart(operand);
} else if ("BMC".equals(operator.toString())) {
parser.checkMarkedContentStart(null);
}
parser.process(operator, operands, true);
if ("EMC".equals(operator.toString())) {
parser.checkMarkedContentEnd();
}
}
}
/**
* Class that processes unknown content.
*/
private static class CopyContentOperator implements PdfOperator{
/**
* @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
*/
public void process(OCGParser parser,
PdfLiteral operator, List operands) throws IOException {
parser.process(operator, operands, true);
}
}
}