com.aowagie.text.pdf.PdfContentParser Maven / Gradle / Ivy
/*
* $Id: PdfContentParser.java 3117 2008-01-31 05:53:22Z xlv $
*
* Copyright 2005 by Paulo Soares.
*
* The contents of this file are subject to the Mozilla Public License Version 1.1
* (the "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the License.
*
* The Original Code is 'iText, a free JAVA-PDF library'.
*
* The Initial Developer of the Original Code is Bruno Lowagie. Portions created by
* the Initial Developer are Copyright (C) 1999, 2000, 2001, 2002 by Bruno Lowagie.
* All Rights Reserved.
* Co-Developer of the code is Paulo Soares. Portions created by the Co-Developer
* are Copyright (C) 2000, 2001, 2002 by Paulo Soares. All Rights Reserved.
*
* Contributor(s): all the names of the contributors are added in the source code
* where applicable.
*
* Alternatively, the contents of this file may be used under the terms of the
* LGPL license (the "GNU LIBRARY GENERAL PUBLIC LICENSE"), in which case the
* provisions of LGPL are applicable instead of those above. If you wish to
* allow use of your version of this file only under the terms of the LGPL
* License and not to allow others to use your version of this file under
* the MPL, indicate your decision by deleting the provisions above and
* replace them with the notice and other provisions required by the LGPL.
* If you do not delete the provisions above, a recipient may use your version
* of this file under either the MPL or the GNU LIBRARY GENERAL PUBLIC LICENSE.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the MPL as stated above or under the terms of the GNU
* Library General Public License as published by the Free Software Foundation;
* either version 2 of the License, or any later version.
*
* This library is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU Library general Public License for more
* details.
*
* If you didn't download this code from the following link, you should check if
* you aren't using an obsolete version:
* http://www.lowagie.com/iText/
*/
package com.aowagie.text.pdf;
import java.io.IOException;
import java.util.ArrayList;
/**
* Parses the page or template content.
* @author Paulo Soares ([email protected])
*/
public class PdfContentParser {
/**
* Commands have this type.
*/
static final int COMMAND_TYPE = 200;
/**
* Holds value of property tokeniser.
*/
private PRTokeniser tokeniser;
/**
* Creates a new instance of PdfContentParser
* @param tokeniser the tokeniser with the content
*/
public PdfContentParser(final PRTokeniser tokeniser) {
this.tokeniser = tokeniser;
}
/**
* Parses a single command from the content. Each command is output as an array of arguments
* having the command itself as the last element. The returned array will be empty if the
* end of content was reached.
* @param ls an ArrayList
to use. It will be cleared before using. If it's
* null
will create a new ArrayList
* @return the same ArrayList
given as argument or a new one
* @throws IOException on error
*/
public ArrayList parse(ArrayList ls) throws IOException {
if (ls == null) {
ls = new ArrayList();
} else {
ls.clear();
}
PdfObject ob = null;
while ((ob = readPRObject()) != null) {
ls.add(ob);
if (ob.type() == COMMAND_TYPE) {
break;
}
}
return ls;
}
/**
* Gets the tokeniser.
* @return the tokeniser.
*/
public PRTokeniser getTokeniser() {
return this.tokeniser;
}
/**
* Sets the tokeniser.
* @param tokeniser the tokeniser
*/
public void setTokeniser(final PRTokeniser tokeniser) {
this.tokeniser = tokeniser;
}
/**
* Reads a dictionary. The tokeniser must be positioned past the "<<" token.
* @return the dictionary
* @throws IOException on error
*/
private PdfDictionary readDictionary() throws IOException {
final PdfDictionary dic = new PdfDictionary();
while (true) {
if (!nextValidToken()) {
throw new IOException("Unexpected end of file.");
}
if (this.tokeniser.getTokenType() == PRTokeniser.TK_END_DIC) {
break;
}
if (this.tokeniser.getTokenType() != PRTokeniser.TK_NAME) {
throw new IOException("Dictionary key is not a name.");
}
final PdfName name = new PdfName(this.tokeniser.getStringValue(), false);
final PdfObject obj = readPRObject();
final int type = obj.type();
if (-type == PRTokeniser.TK_END_DIC) {
throw new IOException("Unexpected '>>'");
}
if (-type == PRTokeniser.TK_END_ARRAY) {
throw new IOException("Unexpected ']'");
}
dic.put(name, obj);
}
return dic;
}
/**
* Reads an array. The tokeniser must be positioned past the "[" token.
* @return an array
* @throws IOException on error
*/
private PdfArray readArray() throws IOException {
final PdfArray array = new PdfArray();
while (true) {
final PdfObject obj = readPRObject();
final int type = obj.type();
if (-type == PRTokeniser.TK_END_ARRAY) {
break;
}
if (-type == PRTokeniser.TK_END_DIC) {
throw new IOException("Unexpected '>>'");
}
array.add(obj);
}
return array;
}
/**
* Reads a pdf object.
* @return the pdf object
* @throws IOException on error
*/
PdfObject readPRObject() throws IOException {
if (!nextValidToken()) {
return null;
}
final int type = this.tokeniser.getTokenType();
switch (type) {
case PRTokeniser.TK_START_DIC: {
final PdfDictionary dic = readDictionary();
return dic;
}
case PRTokeniser.TK_START_ARRAY:
return readArray();
case PRTokeniser.TK_STRING:
final PdfString str = new PdfString(this.tokeniser.getStringValue(), null).setHexWriting(this.tokeniser.isHexString());
return str;
case PRTokeniser.TK_NAME:
return new PdfName(this.tokeniser.getStringValue(), false);
case PRTokeniser.TK_NUMBER:
return new PdfNumber(this.tokeniser.getStringValue());
case PRTokeniser.TK_OTHER:
return new PdfLiteral(COMMAND_TYPE, this.tokeniser.getStringValue());
default:
return new PdfLiteral(-type, this.tokeniser.getStringValue());
}
}
/**
* Reads the next token skipping over the comments.
* @return true
if a token was read, false
if the end of content was reached
* @throws IOException on error
*/
private boolean nextValidToken() throws IOException {
while (this.tokeniser.nextToken()) {
if (this.tokeniser.getTokenType() == PRTokeniser.TK_COMMENT) {
continue;
}
return true;
}
return false;
}
}