All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.lowagie.text.pdf.parser.PdfContentStreamHandler Maven / Gradle / Ivy

There is a newer version: 2.0.3
Show newest version
/**
 * Copyright 2014 by Tizra Inc.
 * The contents of this file are subject to the Mozilla Public License Version 1.1
 * (the "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the License.
 *
 * The Original Code is 'iText, a free JAVA-PDF library'.
 *
 * The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
 * the Initial Developer are Copyright (C) 1999-2008 by Bruno Lowagie.
 * All Rights Reserved.
 * Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
 * are Copyright (C) 2000-2008 by Paulo Soares. All Rights Reserved.
 *
 * Contributor(s): all the names of the contributors are added in the source code
 * where applicable.
 *
 * Alternatively, the contents of this file may be used under the terms of the
 * LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
 * provisions of LGPL are applicable instead of those above.  If you wish to
 * allow use of your version of this file only under the terms of the LGPL
 * License and not to allow others to use your version of this file under
 * the MPL, indicate your decision by deleting the provisions above and
 * replace them with the notice and other provisions required by the LGPL.
 * If you do not delete the provisions above, a recipient may use your version
 * of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the MPL as stated above or under the terms of the GNU
 * Library General Public License as published by the Free Software Foundation;
 * either version 2 of the License, or any later version.
 *
 * This library is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
 * details.
 */
package com.lowagie.text.pdf.parser;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.ListIterator;
import java.util.Locale;
import java.util.Map;
import java.util.Stack;

import com.lowagie.text.ExceptionConverter;
import com.lowagie.text.error_messages.MessageLocalization;
import com.lowagie.text.pdf.CMapAwareDocumentFont;
import com.lowagie.text.pdf.PRIndirectReference;
import com.lowagie.text.pdf.PRStream;
import com.lowagie.text.pdf.PRTokeniser;
import com.lowagie.text.pdf.PdfArray;
import com.lowagie.text.pdf.PdfContentParser;
import com.lowagie.text.pdf.PdfDictionary;
import com.lowagie.text.pdf.PdfIndirectReference;
import com.lowagie.text.pdf.PdfLiteral;
import com.lowagie.text.pdf.PdfName;
import com.lowagie.text.pdf.PdfNumber;
import com.lowagie.text.pdf.PdfObject;
import com.lowagie.text.pdf.PdfReader;
import com.lowagie.text.pdf.PdfStream;
import com.lowagie.text.pdf.PdfString;

/**
 * @author dgd
 */
public class PdfContentStreamHandler {
	/**
	 * A content operator implementation (TJ).
	 */
	static class ShowTextArray implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "TJ";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfArray array = (PdfArray) operands.get(0);
			float tj = 0;
			for (Iterator i = array.listIterator(); i.hasNext();) {
				Object entryObj = i.next();
				if (entryObj instanceof PdfString) {
					handler.displayPdfString((PdfString) entryObj);
					tj = 0;
				} else {
					tj = ((PdfNumber) entryObj).floatValue();
					handler.applyTextAdjust(tj);
				}
			}

		}
	}

	/**
	 * A content operator implementation (BT).
	 */
	static class BeginText implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "BT";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			handler.textMatrix = new Matrix();
			handler.textLineMatrix = handler.textMatrix;
		}
	}

	/**
	 * A content operator implementation (ET).
	 */
	static class EndText implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "ET";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			handler.textMatrix = null;
			handler.textLineMatrix = null;
		}
	}

	/**
	 * A content operator implementation (cm).
	 */
	static class ModifyCurrentTransformationMatrix implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "cm";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			float a = ((PdfNumber) operands.get(0)).floatValue();
			float b = ((PdfNumber) operands.get(1)).floatValue();
			float c = ((PdfNumber) operands.get(2)).floatValue();
			float d = ((PdfNumber) operands.get(3)).floatValue();
			float e = ((PdfNumber) operands.get(4)).floatValue();
			float f = ((PdfNumber) operands.get(5)).floatValue();
			Matrix matrix = new Matrix(a, b, c, d, e, f);
			GraphicsState gs = handler.gsStack.peek();
			gs.ctm = gs.ctm.multiply(matrix);
		}
	}

	/**
	 * A content operator implementation (').
	 */
	static class MoveNextLineAndShowText implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "'";
		}

		private final PdfContentStreamHandler.TextMoveNextLine textMoveNextLine;

		private final PdfContentStreamHandler.ShowText showText;

		public MoveNextLineAndShowText(
				PdfContentStreamHandler.TextMoveNextLine textMoveNextLine,
				PdfContentStreamHandler.ShowText showText) {
			this.textMoveNextLine = textMoveNextLine;
			this.showText = showText;
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			textMoveNextLine.invoke(new ArrayList(0), handler,
					resources);
			showText.invoke(operands, handler, resources);
		}
	}

	/**
	 * A content operator implementation (").
	 */
	static class MoveNextLineAndShowTextWithSpacing implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "\"";
		}

		private final PdfContentStreamHandler.SetTextWordSpacing setTextWordSpacing;

		private final PdfContentStreamHandler.SetTextCharacterSpacing setTextCharacterSpacing;

		private final MoveNextLineAndShowText moveNextLineAndShowText;

		public MoveNextLineAndShowTextWithSpacing(
				PdfContentStreamHandler.SetTextWordSpacing setTextWordSpacing,
				PdfContentStreamHandler.SetTextCharacterSpacing setTextCharacterSpacing,
				MoveNextLineAndShowText moveNextLineAndShowText) {
			this.setTextWordSpacing = setTextWordSpacing;
			this.setTextCharacterSpacing = setTextCharacterSpacing;
			this.moveNextLineAndShowText = moveNextLineAndShowText;
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber aw = (PdfNumber) operands.get(0);
			PdfNumber ac = (PdfNumber) operands.get(1);
			PdfString string = (PdfString) operands.get(2);

			ArrayList twOperands = new ArrayList(1);
			twOperands.add(0, aw);
			setTextWordSpacing.invoke(twOperands, handler, resources);

			ArrayList tcOperands = new ArrayList(1);
			tcOperands.add(0, ac);
			setTextCharacterSpacing.invoke(tcOperands, handler, resources);

			ArrayList tickOperands = new ArrayList(1);
			tickOperands.add(0, string);
			moveNextLineAndShowText.invoke(tickOperands, handler, resources);
		}
	}

	/**
	 * A content operator implementation (Q).
	 */
	static class PopGraphicsState implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Q";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			handler.gsStack.pop();
		}
	}

	/**
	 * A content operator implementation (gs).
	 */
	static class ProcessGraphicsStateResource implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "gs";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfName dictionaryName = (PdfName) operands.get(0);
			PdfDictionary extGState = resources.getAsDict(PdfName.EXTGSTATE);
			if (extGState == null) {
				throw new IllegalArgumentException(
						MessageLocalization
								.getComposedMessage(
										"resources.do.not.contain.extgstate.entry.unable.to.process.operator.1",
										getOperatorName()));
			}
			PdfDictionary gsDic = extGState.getAsDict(dictionaryName);
			if (gsDic == null) {
				throw new IllegalArgumentException(
						MessageLocalization.getComposedMessage(
								"1.is.an.unknown.graphics.state.dictionary",
								dictionaryName));
			}

			// at this point, all we care about is the FONT entry in the GS
			// dictionary
			PdfArray fontParameter = gsDic.getAsArray(PdfName.FONT);
			if (fontParameter != null) {
				CMapAwareDocumentFont font = new CMapAwareDocumentFont(
						(PRIndirectReference) fontParameter.getPdfObject(0));
				float size = fontParameter.getAsNumber(1).floatValue();

				handler.gs().font = font;
				handler.gs().fontSize = size;
			}
		}
	}

	/**
	 * A content operator implementation (q).
	 */
	static class PushGraphicsState implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "q";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			GraphicsState gs = handler.gsStack.peek();
			GraphicsState copy = new GraphicsState(gs);
			handler.gsStack.push(copy);
		}
	}

	/**
	 * A content operator implementation (Tc).
	 */
	static class SetTextCharacterSpacing implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tc";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber charSpace = (PdfNumber) operands.get(0);
			handler.gs().characterSpacing = charSpace.floatValue();
		}
	}

	/**
	 * A content operator implementation (Tf).
	 */
	static class SetTextFont implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tf";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfName fontResourceName = (PdfName) operands.get(0);
			float size = ((PdfNumber) operands.get(1)).floatValue();

			PdfDictionary fontsDictionary = resources.getAsDict(PdfName.FONT);
			CMapAwareDocumentFont font = new CMapAwareDocumentFont(
					(PRIndirectReference) fontsDictionary.get(fontResourceName));

			handler.gs().font = font;
			handler.gs().fontSize = size;

		}
	}

	/**
	 * A content operator implementation (Tm).
	 */
	static class TextSetTextMatrix implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tm";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			float a = ((PdfNumber) operands.get(0)).floatValue();
			float b = ((PdfNumber) operands.get(1)).floatValue();
			float c = ((PdfNumber) operands.get(2)).floatValue();
			float d = ((PdfNumber) operands.get(3)).floatValue();
			float e = ((PdfNumber) operands.get(4)).floatValue();
			float f = ((PdfNumber) operands.get(5)).floatValue();

			handler.textLineMatrix = new Matrix(a, b, c, d, e, f);
			handler.textMatrix = handler.textLineMatrix;
		}
	}

	/**
	 * A content operator implementation (TD).
	 */
	static class TextMoveStartNextLineWithLeading implements ContentOperator {
		private final PdfContentStreamHandler.TextMoveStartNextLine moveStartNextLine;

		private final PdfContentStreamHandler.SetTextLeading setTextLeading;

		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "TD";
		}

		public TextMoveStartNextLineWithLeading(
				PdfContentStreamHandler.TextMoveStartNextLine moveStartNextLine,
				PdfContentStreamHandler.SetTextLeading setTextLeading) {
			this.moveStartNextLine = moveStartNextLine;
			this.setTextLeading = setTextLeading;
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			float ty = ((PdfNumber) operands.get(1)).floatValue();

			ArrayList tlOperands = new ArrayList(1);
			tlOperands.add(0, new PdfNumber(-ty));
			setTextLeading.invoke(tlOperands, handler, resources);
			moveStartNextLine.invoke(operands, handler, resources);
		}
	}

	/**
	 * A content operator implementation (Tj).
	 */
	static class ShowText implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tj";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfString string = (PdfString) operands.get(0);

			handler.displayPdfString(string);
		}
	}

	/**
	 * A content operator implementation (T*).
	 */
	static class TextMoveNextLine implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "T*";
		}

		private final TextMoveStartNextLine moveStartNextLine;

		public TextMoveNextLine(TextMoveStartNextLine moveStartNextLine) {
			this.moveStartNextLine = moveStartNextLine;
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			ArrayList tdoperands = new ArrayList(2);
			tdoperands.add(0, new PdfNumber(0));
			tdoperands.add(1, new PdfNumber(-handler.gs().leading));
			moveStartNextLine.invoke(tdoperands, handler, resources);
		}
	}

	/**
	 * A content operator implementation (Td).
	 */
	static class TextMoveStartNextLine implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Td";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			float tx = ((PdfNumber) operands.get(0)).floatValue();
			float ty = ((PdfNumber) operands.get(1)).floatValue();

			Matrix translationMatrix = new Matrix(tx, ty);
			handler.textMatrix = translationMatrix
					.multiply(handler.textLineMatrix);
			handler.textLineMatrix = handler.textMatrix;
		}
	}

	/**
	 * A content operator implementation (Tr).
	 */
	static class SetTextRenderMode implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tr";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber render = (PdfNumber) operands.get(0);
			handler.gs().renderMode = render.intValue();
		}
	}

	/**
	 * A content operator implementation (Ts).
	 */
	static class SetTextRise implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Ts";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber rise = (PdfNumber) operands.get(0);
			handler.gs().rise = rise.floatValue();
		}
	}

	/**
	 * A content operator implementation (TL).
	 */
	static class SetTextLeading implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "TL";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber leading = (PdfNumber) operands.get(0);
			handler.gs().leading = leading.floatValue();
		}
	}

	/**
	 * A content operator implementation (Tz).
	 */
	static class SetTextHorizontalScaling implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tz";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber scale = (PdfNumber) operands.get(0);
			handler.gs().horizontalScaling = scale.floatValue();
		}
	}

	/**
	 * A content operator implementation (Tw).
	 */
	static class SetTextWordSpacing implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Tw";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfNumber wordSpace = (PdfNumber) operands.get(0);
			handler.gs().wordSpacing = wordSpace.floatValue();
		}
	}

	Stack> textFragmentStreams = new Stack>();

	Stack contextNames = new Stack();

	Collection textFragments = new ArrayList();

	/** A map with all supported operators operators (PDF syntax). */
	public Map operators;

	/** Stack keeping track of the graphics state. */
	public Stack gsStack;

	/** Text matrix. */
	public Matrix textMatrix;

	/** Text line matrix. */
	public Matrix textLineMatrix;

	boolean useContainerMarkup;
	
	/**
	 * detail parser for text within a marked section. used by TextAssembler
	 */
	TextAssembler renderListener;

	/**
	 * @param renderListener
	 * 
	 */
	public PdfContentStreamHandler(TextAssembler renderListener) {
		this.renderListener = renderListener;
		installDefaultOperators();
		reset();
	}

	/**
	 * Registers a content operator that will be called when the specified
	 * operator string is encountered during content processing. Each operator
	 * may be registered only once (it is not legal to have multiple operators
	 * with the same operatorString)
	 * 
	 * @param operator
	 *            the operator that will receive notification when the operator
	 *            is encountered
	 * 
	 * @since 2.1.7
	 */
	public void registerContentOperator(ContentOperator operator) {
		String operatorString = operator.getOperatorName();
		if (operators.containsKey(operatorString)) {
			throw new IllegalArgumentException(
					MessageLocalization.getComposedMessage(
							"operator.1.already.registered", operatorString));
		}
		operators.put(operatorString, operator);
	}

	/**
	 * A content operator implementation (BMC).
	 */
	private static class BeginMarked implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "BMC";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfName tagName = (PdfName) operands.get(0);
			String realName = tagName.toString().substring(1).toLowerCase(Locale.ROOT);
			if ("artifact".equals(tagName) || "placedpdf".equals(tagName)) {
				handler.pushContext(null);
			} else {
				handler.pushContext(realName);
			}
		}

	}

	/**
	 * A content operator implementation (BDC).
	 */
	private static class BeginMarkedDict implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "BDC";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			String tagName = ((PdfName) operands.get(0)).toString().substring(1)
					.toLowerCase(Locale.ROOT);
			if ("artifact".equals(tagName) || "placedpdf".equals(tagName)
					|| handler.contextNames.peek() == null) {
				tagName = null;
			} else if ("l".equals(tagName)) {
				tagName = "ul";
			}
			PdfDictionary attrs = getBDCDictionary(operands, resources);
			if (attrs != null && tagName != null) {
				PdfString alternateText = attrs.getAsString(PdfName.E);
				if (alternateText != null) {
					handler.pushContext(tagName);
					handler.textFragments
							.add(new FinalText(alternateText.toString()));
					handler.popContext();
					// ignore rest of the content of this element
					handler.pushContext(null);
					return;
				} else if (attrs.get(PdfName.TYPE) != null) {
					// ignore tag for non-tag marked content that sometimes
					// shows up.
					tagName = "";
				}
			}
			handler.pushContext(tagName);
		}

		/**
		 * @param operands
		 * @param resources
		 * @return
		 */
		private PdfDictionary getBDCDictionary(ArrayList operands,
				PdfDictionary resources) {
			PdfObject o = operands.get(1);
			if (o.isName()) {
				PdfDictionary properties = resources
						.getAsDict(PdfName.PROPERTIES);
				PdfIndirectReference ir = properties
						.getAsIndirectObject((PdfName) o);
				if (ir != null) {
					o = ir.getIndRef();
				} else {
					o = properties.getAsDict((PdfName) o);
				}
			}
			PdfDictionary attrs = (PdfDictionary) o;
			return attrs;
		}
	}

	/**
	 * A content operator implementation (EMC).
	 */
	private static class EndMarked implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "EMC";
		}

		@Override
		public void invoke(ArrayList operands,
				PdfContentStreamHandler handler, PdfDictionary resources) {
			handler.popContext();
		}
	}
	
	private  class Do implements ContentOperator {
		/**
		 * @see com.lowagie.text.pdf.parser.ContentOperator#getOperatorName()
		 */
		@Override
		public String getOperatorName() {
			return "Do";
		}
		
		@Override
		public void invoke(ArrayList operands, PdfContentStreamHandler handler, PdfDictionary resources) {
			PdfObject firstOperand = operands.get(0);
			if (firstOperand instanceof PdfName) {
				PdfName name = (PdfName) firstOperand;
				PdfDictionary dictionary = resources.getAsDict(PdfName.XOBJECT);
				if (dictionary == null) {
					return;
				}
				PdfStream stream = (PdfStream) dictionary.getDirectObject(name);
				PdfName subType = stream.getAsName(PdfName.SUBTYPE);
				if (PdfName.FORM.equals(subType)) {
					PdfDictionary resources2 = stream.getAsDict(PdfName.RESOURCES);
					byte[] data = null;
					try {
						data = getContentBytesFromPdfObject(stream);
					} catch (IOException ex) {
						throw new ExceptionConverter(ex);
					}
					new PushGraphicsState().invoke(operands, handler, resources);			
					processContent(data, resources2);
					new PopGraphicsState().invoke(operands, handler, resources);
				}
			}
			
		}
		private void processContent(byte[] contentBytes, PdfDictionary resources) {
			try {
				PdfContentParser ps = new PdfContentParser(new PRTokeniser(contentBytes));
				ArrayList operands = new ArrayList();
				while (ps.parse(operands).size() > 0) {
					PdfLiteral operator = (PdfLiteral) operands.get(operands.size() - 1);
					invokeOperator(operator, operands, resources);
				}
			} catch (Exception e) {
				throw new ExceptionConverter(e);
			}
		}


		private byte[] getContentBytesFromPdfObject(PdfObject object) throws IOException {
			switch (object.type()) {
			case PdfObject.INDIRECT:
				return getContentBytesFromPdfObject(PdfReader.getPdfObject(object));
			case PdfObject.STREAM:
				return PdfReader.getStreamBytes((PRStream) PdfReader.getPdfObject(object));
			case PdfObject.ARRAY:
				ByteArrayOutputStream baos = new ByteArrayOutputStream();
				ListIterator iter = ((PdfArray) object).listIterator();
				while (iter.hasNext()) {
					PdfObject element = iter.next();
					baos.write(getContentBytesFromPdfObject(element));
				}
				return baos.toByteArray();
			default:
				throw new IllegalStateException("Unsupported type: " + object.getClass().getCanonicalName());
			}
		}
	}

	/**
	 * Loads all the supported graphics and text state operators in a map.
	 */
	protected void installDefaultOperators() {
		operators = new HashMap();

		registerContentOperator(new PdfContentStreamHandler.PushGraphicsState());
		registerContentOperator(new PdfContentStreamHandler.PopGraphicsState());
		registerContentOperator(new PdfContentStreamHandler.ModifyCurrentTransformationMatrix());
		registerContentOperator(new PdfContentStreamHandler.ProcessGraphicsStateResource());

		PdfContentStreamHandler.SetTextCharacterSpacing tcOperator = new PdfContentStreamHandler.SetTextCharacterSpacing();
		registerContentOperator(tcOperator);
		PdfContentStreamHandler.SetTextWordSpacing twOperator = new PdfContentStreamHandler.SetTextWordSpacing();
		registerContentOperator(twOperator);
		registerContentOperator(new PdfContentStreamHandler.SetTextHorizontalScaling());
		PdfContentStreamHandler.SetTextLeading tlOperator = new PdfContentStreamHandler.SetTextLeading();
		registerContentOperator(tlOperator);
		registerContentOperator(new PdfContentStreamHandler.SetTextFont());
		registerContentOperator(new PdfContentStreamHandler.SetTextRenderMode());
		registerContentOperator(new PdfContentStreamHandler.SetTextRise());

		registerContentOperator(new PdfContentStreamHandler.BeginText());
		registerContentOperator(new PdfContentStreamHandler.EndText());

		PdfContentStreamHandler.TextMoveStartNextLine tdOperator = new PdfContentStreamHandler.TextMoveStartNextLine();
		registerContentOperator(tdOperator);
		registerContentOperator(new PdfContentStreamHandler.TextMoveStartNextLineWithLeading(
				tdOperator, tlOperator));
		registerContentOperator(new PdfContentStreamHandler.TextSetTextMatrix());
		PdfContentStreamHandler.TextMoveNextLine tstarOperator = new PdfContentStreamHandler.TextMoveNextLine(
				tdOperator);
		registerContentOperator(tstarOperator);

		PdfContentStreamHandler.ShowText tjOperator = new PdfContentStreamHandler.ShowText();
		registerContentOperator(new PdfContentStreamHandler.ShowText());
		PdfContentStreamHandler.MoveNextLineAndShowText tickOperator = new PdfContentStreamHandler.MoveNextLineAndShowText(
				tstarOperator, tjOperator);
		registerContentOperator(tickOperator);
		registerContentOperator(new PdfContentStreamHandler.MoveNextLineAndShowTextWithSpacing(
				twOperator, tcOperator, tickOperator));
		registerContentOperator(new PdfContentStreamHandler.ShowTextArray());
		// marked sections
		registerContentOperator(new BeginMarked());
		registerContentOperator(new BeginMarkedDict());
		registerContentOperator(new EndMarked());
		
		registerContentOperator(new Do());
	}

	/**
	 * Get the operator to process a command with a given name
	 * 
	 * @param operatorName
	 *            name of the operator that we might need to call
	 * 
	 * @return the operator or null if none present
	 */
	public ContentOperator lookupOperator(String operatorName) {
		return operators.get(operatorName);
	}

	/**
	 * Invokes an operator.
	 * 
	 * @param operator
	 *            the PDF Syntax of the operator
	 * @param operands
	 *            a list with operands
	 * @param resources
	 *            Pdf Resources found in the file containing the stream.
	 */
	public void invokeOperator(PdfLiteral operator,
			ArrayList operands, PdfDictionary resources) {
		String operatorName = operator.toString();
		ContentOperator op = lookupOperator(operatorName);
		if (op == null) {
			// System.out.println("Skipping operator " + operator);
			return;
		}
		// System.err.println(operator);
		// System.err.println(operands);
		op.invoke(operands, this, resources);
	}

	void popContext() {
		String contextName = contextNames.pop();
		Collection newBuffer = textFragmentStreams.pop();
		// put together set of unparsed text fragments
		renderListener.reset();
		for (TextAssemblyBuffer fragment : textFragments) {
			fragment.accumulate(renderListener, contextName);
		}
		FinalText contextResult = renderListener.endParsingContext(contextName);
		if (contextResult != null && contextResult.getText().length() > 0) {
			newBuffer.add(contextResult);
		}
		textFragments = newBuffer;
	}

	void pushContext(String newContextName) {
		contextNames.push(newContextName);
		textFragmentStreams.push(textFragments);
		textFragments = new ArrayList();
	}

	/**
	 * Returns the current graphics state.
	 * 
	 * @return the graphics state
	 */
	GraphicsState gs() {
		return gsStack.peek();
	}

	/**
	 *
	 */
	public void reset() {
		if (gsStack == null || gsStack.isEmpty()) {
			gsStack = new Stack();
		}
		gsStack.add(new GraphicsState());
		textMatrix = null;
		textLineMatrix = null;
	}

	/**
	 * Returns the current text matrix.
	 * 
	 * @return the text matrix
	 * @since 2.1.5
	 */
	protected Matrix getCurrentTextMatrix() {
		return textMatrix;
	}

	/**
	 * Returns the current line matrix.
	 * 
	 * @return the line matrix
	 * @since 2.1.5
	 */
	protected Matrix getCurrentTextLineMatrix() {
		return textLineMatrix;
	}

	/**
	 * Adjusts the text matrix for the specified adjustment value (see TJ
	 * operator in the PDF spec for information)
	 * 
	 * @param tj
	 *            the text adjustment
	 */
	void applyTextAdjust(float tj) {
		float adjustBy = -tj / 1000f * gs().fontSize * gs().horizontalScaling;

		textMatrix = new Matrix(adjustBy, 0).multiply(textMatrix);
	}

	/**
	 * @return current font in processing state
	 */
	public CMapAwareDocumentFont getCurrentFont() {
		return gs().font;
	}

	/**
	 * Displays text.
	 * 
	 * @param string
	 *            the text to display
	 */
	void displayPdfString(PdfString string) {
		ParsedText renderInfo = new ParsedText(string, gs(), textMatrix);
		if (contextNames.peek() != null) {
			textFragments.add(renderInfo);
		}
		textMatrix = new Matrix(renderInfo.getUnscaledTextWidth(gs()), 0)
				.multiply(textMatrix);
	}

	/**
	 * @return
	 */
	public String getResultantText() {
		if (contextNames.size() > 0) {
			throw new RuntimeException(
					"can't get text with unprocessed stack items");
		}
		StringBuffer res = new StringBuffer();
		for (TextAssemblyBuffer fragment : textFragments) {
			res.append(fragment.getText());
		}
		return res.toString();
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy