All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.text.pdf.ocg.OCGParser Maven / Gradle / Ivy

The newest version!
/*
 *
 * This file is part of the iText (R) project.
    Copyright (c) 1998-2022 iText Group NV
 * Authors: Bruno Lowagie, Paulo Soares, et al.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License version 3
 * as published by the Free Software Foundation with the addition of the
 * following permission added to Section 15 as permitted in Section 7(a):
 * FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
 * ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
 * OF THIRD PARTY RIGHTS
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 * or FITNESS FOR A PARTICULAR PURPOSE.
 * See the GNU Affero General Public License for more details.
 * You should have received a copy of the GNU Affero General Public License
 * along with this program; if not, see http://www.gnu.org/licenses or write to
 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 * Boston, MA, 02110-1301 USA, or download the license from the following URL:
 * http://itextpdf.com/terms-of-use/
 *
 * The interactive user interfaces in modified source and object code versions
 * of this program must display Appropriate Legal Notices, as required under
 * Section 5 of the GNU Affero General Public License.
 *
 * In accordance with Section 7(b) of the GNU Affero General Public License,
 * a covered work must retain the producer line in every PDF that is created
 * or manipulated using iText.
 *
 * You can be released from the requirements of the license by purchasing
 * a commercial license. Buying such a license is mandatory as soon as you
 * develop commercial activities involving the iText software without
 * disclosing the source code of your own applications.
 * These activities include: offering paid services to customers as an ASP,
 * serving PDFs on the fly in a web application, shipping iText with a closed
 * source product.
 *
 * For more information, please contact iText Software Corp. at this
 * address: [email protected]
 */
package com.itextpdf.text.pdf.ocg;

import com.itextpdf.text.ExceptionConverter;
import com.itextpdf.text.pdf.PRStream;
import com.itextpdf.text.pdf.PRTokeniser;
import com.itextpdf.text.pdf.PdfContentParser;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfLiteral;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfString;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

/**
 * A helper class for OCGRemover.
 */
public class OCGParser {

	/** Constant used for the default operator. */
    public static final String DEFAULTOPERATOR = "DefaultOperator";
    
	/** A map with all supported operators operators (PDF syntax). */
    protected static final Map operators;

	static {
		operators = new HashMap();
		populateOperators();
	}
    
    /** The OutputStream of this worker object. */
    protected ByteArrayOutputStream baos;
   
    /** Keeps track of BMC/EMC balance. */
    protected int mc_balance = 0;
    
    /** The names of XObjects that shouldn't be shown. */
    protected Set xobj;
    
    /** The OCGs that need to be removed. */
    protected Set ocgs;
    
    /** The OCG properties. */
    protected PdfDictionary properties = null;
    
    /**
     * Creates an instance of the OCGParser.
     * @param ocgs	a set of String values with the names of the OCGs that need to be removed.
     */
    public OCGParser(Set ocgs) {
    	this.ocgs = ocgs;
    }
    
    /**
     * Parses a stream object and removes OCGs.
     * @param stream	a stream object
     * @param resources	the resources dictionary of that object (containing info about the OCGs)
     */
    public void parse(PRStream stream, PdfDictionary resources) throws IOException {
    	baos = new ByteArrayOutputStream();
    	properties = resources.getAsDict(PdfName.PROPERTIES);
    	xobj = new HashSet();
    	PdfDictionary xobjects = resources.getAsDict(PdfName.XOBJECT);
    	PRStream xobject;
    	PdfDictionary oc;
    	PdfString ocname;
		if (xobjects != null) {
			// remove XObject (form or image) that belong to an OCG that needs to be removed
			for (PdfName name : xobjects.getKeys()) {
				xobject = (PRStream)xobjects.getAsStream(name);
				oc = xobject.getAsDict(PdfName.OC);
				if (oc != null) {
					ocname = oc.getAsString(PdfName.NAME);
					if (ocname != null && ocgs.contains(ocname.toString())) {
						xobj.add(name);
					}
				}
			}
			for (PdfName name : xobj) {
				xobjects.remove(name);
			}
		}
    	try {
    		// parse the content stream
    		byte[] contentBytes = PdfReader.getStreamBytes(stream);
            PRTokeniser tokeniser = new PRTokeniser(new RandomAccessFileOrArray(contentBytes));
            PdfContentParser ps = new PdfContentParser(tokeniser);
            ArrayList operands = new ArrayList();
            while (ps.parse(operands).size() > 0){
                PdfLiteral operator = (PdfLiteral)operands.get(operands.size() - 1);
				processOperator(this, operator, operands);
				if ("BI".equals(operator.toString())) {
					int found = 0;
					int ch;
					boolean immediateAfterBI = true;
					while ((ch = tokeniser.read()) != -1) {
						if (!immediateAfterBI || !PRTokeniser.isWhitespace(ch)) {
							baos.write(ch);
						}
						immediateAfterBI = false;
						if (found == 0 && PRTokeniser.isWhitespace(ch)){
							found++;
						} else if (found == 1 && ch == 'E'){
							found++;
						} else if (found == 1 && PRTokeniser.isWhitespace(ch)){
							// this clause is needed if we have a white space character that is part of the image data
							// followed by a whitespace character that precedes the EI operator.  In this case, we need
							// to flush the first whitespace, then treat the current whitespace as the first potential
							// character for the end of stream check. Note that we don't increment 'found' here.
						} else if (found == 2 && ch == 'I'){
							found++;
						} else if (found == 3 && PRTokeniser.isWhitespace(ch)){
							break;
						} else {
							found = 0;
						}
					}
				}
            }
        }
        catch (Exception e) {
            throw new ExceptionConverter(e);
        }
        baos.flush();
        baos.close();
        stream.setData(baos.toByteArray());
    }
    
    /**
     * Processes an operator.
     * @param parser	the parser that needs to process the operator
     * @param operator	the operator
     * @param operands	its operands
     * @throws Exception
     */
    protected static void processOperator(OCGParser parser, PdfLiteral operator, List operands) throws Exception{
        PdfOperator op = operators.get(operator.toString());
        if (op == null)
            op = operators.get(DEFAULTOPERATOR);
        op.process(parser, operator, operands);
    }

    /**
     * Populates the operators variable.
     */
    protected static void populateOperators() {
    	operators.put(DEFAULTOPERATOR, new CopyContentOperator());
    	PathConstructionOrPaintingOperator opConstructionPainting = new PathConstructionOrPaintingOperator();
    	operators.put("m", opConstructionPainting);
    	operators.put("l", opConstructionPainting);
    	operators.put("c", opConstructionPainting);
    	operators.put("v", opConstructionPainting);
    	operators.put("y", opConstructionPainting);
    	operators.put("h", opConstructionPainting);
    	operators.put("re", opConstructionPainting);
    	operators.put("S", opConstructionPainting);
    	operators.put("s", opConstructionPainting);
    	operators.put("f", opConstructionPainting);
    	operators.put("F", opConstructionPainting);
    	operators.put("f*", opConstructionPainting);
    	operators.put("B", opConstructionPainting);
    	operators.put("B*", opConstructionPainting);
    	operators.put("b", opConstructionPainting);
    	operators.put("b*", opConstructionPainting);
    	operators.put("n", opConstructionPainting);
    	operators.put("W", opConstructionPainting);
    	operators.put("W*", opConstructionPainting);
    	GraphicsOperator graphics = new GraphicsOperator();
    	operators.put("q", graphics);
    	operators.put("Q", graphics);
    	operators.put("w", graphics);
    	operators.put("J", graphics);
    	operators.put("j", graphics);
    	operators.put("M", graphics);
    	operators.put("d", graphics);
    	operators.put("ri", graphics);
    	operators.put("i", graphics);
    	operators.put("gs", graphics);
    	operators.put("cm", graphics);
    	operators.put("g", graphics);
    	operators.put("G", graphics);
    	operators.put("rg", graphics);
    	operators.put("RG", graphics);
    	operators.put("k", graphics);
    	operators.put("K", graphics);
    	operators.put("cs", graphics);
    	operators.put("CS", graphics);
    	operators.put("sc", graphics);
    	operators.put("SC", graphics);
    	operators.put("scn", graphics);
    	operators.put("SCN", graphics);
    	operators.put("sh", graphics);
    	XObjectOperator	xObject = new XObjectOperator();
    	operators.put("Do", xObject);
    	InlineImageOperator inlineImage = new InlineImageOperator();
    	operators.put("BI", inlineImage);
    	operators.put("EI", inlineImage);
    	TextOperator text = new TextOperator();
    	operators.put("BT", text);
    	operators.put("ID", text);
    	operators.put("ET", text);
    	operators.put("Tc", text);
    	operators.put("Tw", text);
    	operators.put("Tz", text);
    	operators.put("TL", text);
    	operators.put("Tf", text);
    	operators.put("Tr", text);
    	operators.put("Ts", text);
    	operators.put("Td", text);
    	operators.put("TD", text);
    	operators.put("Tm", text);
    	operators.put("T*", text);
    	operators.put("Tj", text);
    	operators.put("'", text);
    	operators.put("\"", text);
    	operators.put("TJ", text);
    	MarkedContentOperator markedContent = new MarkedContentOperator();
    	operators.put("BMC", markedContent);
    	operators.put("BDC", markedContent);
    	operators.put("EMC", markedContent);
    }

    /**
     * Checks operands to find out if the corresponding operator needs to be present or not.
     * @param operands	a list of operands
     * @return	true if the operators needs to be present.
     */
    protected boolean isVisible(List operands) {
    	if (operands.size() > 1 && xobj.contains(operands.get(0)))
    		return false;
    	return true;
    }
    
    /**
     * Checks if the parser is currently parsing content that needs to be ignored.
     * @return	true if the content needs to be ignored
     */
    protected boolean isToRemoved() {
    	if (mc_balance > 0) return true;
    	return false;
    }
    
    /**
     * Keeps track of the MarkedContent state.
     * @param ocref	a reference to an OCG dictionary
     */
    protected void checkMarkedContentStart(PdfName ocref) {
    	if (mc_balance > 0) {
    		mc_balance++;
    		return;
    	}
    	if (properties == null)
    		return;
    	PdfDictionary ocdict = properties.getAsDict(ocref);
    	if (ocdict == null)
    		return;
    	PdfString ocname = ocdict.getAsString(PdfName.NAME);
    	if (ocname == null)
    		return;
    	if (ocgs.contains(ocname.toString()))
    		mc_balance++;
    }

    /**
     * Keeps track of the MarkedContent state.
     */
    protected void checkMarkedContentEnd() {
    	if (mc_balance > 0)
    		mc_balance--;
    }
    
    /**
     * Processes an operator
     * @param operator	the operator
     * @param operands	its operands
     * @param removable	is the operator eligable for removal?
     * @throws IOException
     */
    protected void process(PdfLiteral operator, List operands, boolean removable) throws IOException{
		if (removable && isToRemoved())
			return;
    	operands.remove(operator);
		for (PdfObject o : operands) {
			printsp(o);
		}
		println(operator);
    }
    
    /**
     * Writes a PDF object to the OutputStream, followed by a space character.
     * @param o
     * @throws IOException
     */
    protected void printsp(PdfObject o) throws IOException {
    	o.toPdf(null, baos);
    	baos.write(' ');
    }

    /**
     * Writes a PDF object to the OutputStream, followed by a newline character.
     * @param o
     * @throws IOException
     */
    protected void println(PdfObject o) throws IOException {
    	o.toPdf(null, baos);
    	baos.write('\n');
    }
    
    /**
     * PDF Operator interface.
     */
    public interface PdfOperator {
    	/**
    	 * Methods that processes an operator
    	 * @param parser	the parser
    	 * @param operator	the operator
    	 * @param operands	its operands
    	 * @throws IOException
    	 */
    	public void process(OCGParser parser, PdfLiteral operator, List operands) throws IOException;
    }
    
    /**
     * Class that knows how to process path construction, path painting and path clipping operators.
     */
    private static class PathConstructionOrPaintingOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
		public void process(OCGParser parser, PdfLiteral operator,
				List operands) throws IOException {
			parser.process(operator, operands, true);
		}
    	
    }
    
    /**
     * Class that knows how to process graphics state operators.
     */
    private static class GraphicsOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
    	public void process(OCGParser parser, PdfLiteral operator,
    			List operands) throws IOException {
			parser.process(operator, operands, false);
    	}
    }
    
    /**
     * Class that knows how to process XObject operators.
     */
    private static class XObjectOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
    	public void process(OCGParser parser, PdfLiteral operator,
    			List operands) throws IOException {
    		if (parser.isVisible(operands))
    			parser.process(operator, operands, true);
    	}
    }
    
    /**
     * Class that knows how to process inline image operators.
     */
    private static class InlineImageOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
    	public void process(OCGParser parser, PdfLiteral operator,
    			List operands) throws IOException {
			parser.process(operator, operands, true);
    	}
    }

    /**
     * Class that knows how to process text state operators.
     */
    private static class TextOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
    	public void process(OCGParser parser, PdfLiteral operator,
    			List operands) throws IOException {
			parser.process(operator, operands, true);
    	}
    }

    /**
     * Class that knows how to process marked content operators.
     */
    private static class MarkedContentOperator implements PdfOperator {

		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
    	public void process(OCGParser parser, PdfLiteral operator,
    			List operands) throws IOException {
    		if ("BDC".equals(operator.toString())) {
                    PdfName operand = null;
                    if (operands.size() > 1 && PdfName.OC.equals(operands.get(0))) {
    			operand = (PdfName) operands.get(1);
                    }
    			parser.checkMarkedContentStart(operand);
    		} else if ("BMC".equals(operator.toString())) {
    			parser.checkMarkedContentStart(null);
    		}
			parser.process(operator, operands, true);
			if ("EMC".equals(operator.toString())) {
    			parser.checkMarkedContentEnd();
    		}
    	}
    }

    /**
     * Class that processes unknown content.
     */
    private static class CopyContentOperator implements PdfOperator{
    	
		/**
		 * @see com.itextpdf.text.pdf.ocg.OCGParser.PdfOperator#process(com.itextpdf.text.pdf.ocg.OCGParser, com.itextpdf.text.pdf.PdfLiteral, java.util.List)
		 */
		public void process(OCGParser parser,
				PdfLiteral operator, List operands) throws IOException {
			parser.process(operator, operands, true);
		}
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy