com.itextpdf.kernel.pdf.PdfReader Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2022 iText Group NV
Authors: Bruno Lowagie, Paulo Soares, et al.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License version 3
as published by the Free Software Foundation with the addition of the
following permission added to Section 15 as permitted in Section 7(a):
FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
ITEXT GROUP. ITEXT GROUP DISCLAIMS THE WARRANTY OF NON INFRINGEMENT
OF THIRD PARTY RIGHTS
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.
See the GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program; if not, see http://www.gnu.org/licenses or write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA, 02110-1301 USA, or download the license from the following URL:
http://itextpdf.com/terms-of-use/
The interactive user interfaces in modified source and object code versions
of this program must display Appropriate Legal Notices, as required under
Section 5 of the GNU Affero General Public License.
In accordance with Section 7(b) of the GNU Affero General Public License,
a covered work must retain the producer line in every PDF that is created
or manipulated using iText.
You can be released from the requirements of the license by purchasing
a commercial license. Buying such a license is mandatory as soon as you
develop commercial activities involving the iText software without
disclosing the source code of your own applications.
These activities include: offering paid services to customers as an ASP,
serving PDFs on the fly in a web application, shipping iText with a closed
source product.
For more information, please contact iText Software Corp. at this
address: [email protected]
*/
package com.itextpdf.kernel.pdf;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteBuffer;
import com.itextpdf.io.source.ByteUtils;
import com.itextpdf.io.source.IRandomAccessSource;
import com.itextpdf.io.source.PdfTokenizer;
import com.itextpdf.io.source.RASInputStream;
import com.itextpdf.io.source.RandomAccessFileOrArray;
import com.itextpdf.io.source.RandomAccessSourceFactory;
import com.itextpdf.io.source.WindowRandomAccessSource;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.kernel.exceptions.InvalidXRefPrevException;
import com.itextpdf.kernel.exceptions.MemoryLimitsAwareException;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.crypto.securityhandler.UnsupportedSecurityHandlerException;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.XrefCycledReferencesException;
import com.itextpdf.kernel.pdf.filters.FilterHandlers;
import com.itextpdf.kernel.pdf.filters.IFilterHandler;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Reads a PDF document.
*/
public class PdfReader implements Closeable {
/**
* The default {@link StrictnessLevel} to be used.
*/
public static final StrictnessLevel DEFAULT_STRICTNESS_LEVEL = StrictnessLevel.LENIENT;
private static final String endstream1 = "endstream";
private static final String endstream2 = "\nendstream";
private static final String endstream3 = "\r\nendstream";
private static final String endstream4 = "\rendstream";
private static final byte[] endstream = ByteUtils.getIsoBytes("endstream");
private static final byte[] endobj = ByteUtils.getIsoBytes("endobj");
protected static boolean correctStreamLength = true;
private boolean unethicalReading;
private boolean memorySavingMode;
private StrictnessLevel strictnessLevel = DEFAULT_STRICTNESS_LEVEL;
//indicate nearest first Indirect reference object which includes current reading the object, using for PdfString decrypt
private PdfIndirectReference currentIndirectReference;
protected PdfTokenizer tokens;
protected PdfEncryption decrypt;
// here we store only the pdfVersion that is written in the document's header,
// however it could differ from the actual pdf version that could be written in document's catalog
protected PdfVersion headerPdfVersion;
protected long lastXref;
protected long eofPos;
protected PdfDictionary trailer;
protected PdfDocument pdfDocument;
protected PdfAConformanceLevel pdfAConformanceLevel;
protected ReaderProperties properties;
protected boolean encrypted = false;
protected boolean rebuiltXref = false;
protected boolean hybridXref = false;
protected boolean fixedXref = false;
protected boolean xrefStm = false;
/**
* Constructs a new PdfReader.
*
* @param byteSource source of bytes for the reader
* @param properties properties of the created reader
* @throws IOException if an I/O error occurs
*/
public PdfReader(IRandomAccessSource byteSource, ReaderProperties properties) throws IOException {
this(byteSource, properties, false);
}
/**
* Reads and parses a PDF document.
*
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
* @param properties properties of the created reader
*
* @throws IOException on error
*/
public PdfReader(InputStream is, ReaderProperties properties) throws IOException {
this(new RandomAccessSourceFactory().extractOrCreateSource(is), properties, true);
}
/**
* Reads and parses a PDF document.
*
* @param file the {@code File} containing the document.
* @throws IOException on error
* @throws FileNotFoundException when the specified File is not found
*/
public PdfReader(java.io.File file) throws FileNotFoundException, IOException {
this(file.getAbsolutePath());
}
/**
* Reads and parses a PDF document.
*
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
*
* @throws IOException on error
*/
public PdfReader(InputStream is) throws IOException {
this(is, new ReaderProperties());
}
/**
* Reads and parses a PDF document.
*
* @param filename the file name of the document
* @param properties properties of the created reader
* @throws IOException on error
*/
public PdfReader(String filename, ReaderProperties properties) throws IOException {
this(
new RandomAccessSourceFactory()
.setForceRead(false)
.createBestSource(filename),
properties,
true
);
}
/**
* Reads and parses a PDF document.
*
* @param filename the file name of the document
* @throws IOException on error
*/
public PdfReader(String filename) throws IOException {
this(filename, new ReaderProperties());
}
PdfReader(IRandomAccessSource byteSource, ReaderProperties properties, boolean closeStream) throws IOException {
this.properties = properties;
this.tokens = getOffsetTokeniser(byteSource, closeStream);
}
/**
* Close {@link PdfTokenizer}.
*
* @throws IOException on error.
*/
public void close() throws IOException {
tokens.close();
}
/**
* The iText is not responsible if you decide to change the
* value of this parameter.
*
* @param unethicalReading true to enable unethicalReading, false to disable it.
* By default unethicalReading is disabled.
* @return this {@link PdfReader} instance.
*/
public PdfReader setUnethicalReading(boolean unethicalReading) {
this.unethicalReading = unethicalReading;
return this;
}
/**
* Defines if memory saving mode is enabled.
*
* By default memory saving mode is disabled for the sake of time–memory trade-off.
*
* If memory saving mode is enabled, document processing might slow down, but reading will be less memory demanding.
*
* @param memorySavingMode true to enable memory saving mode, false to disable it.
* @return this {@link PdfReader} instance.
*/
public PdfReader setMemorySavingMode(boolean memorySavingMode) {
this.memorySavingMode = memorySavingMode;
return this;
}
/**
* Get the current {@link StrictnessLevel} of the reader.
*
* @return the current {@link StrictnessLevel}
*/
public StrictnessLevel getStrictnessLevel() {
return strictnessLevel;
}
/**
* Set the {@link StrictnessLevel} for the reader. If the argument is {@code null}, then
* the {@link PdfReader#DEFAULT_STRICTNESS_LEVEL} will be used.
*
* @param strictnessLevel the {@link StrictnessLevel} to set
*
* @return this {@link PdfReader} instance
*/
public PdfReader setStrictnessLevel(StrictnessLevel strictnessLevel) {
this.strictnessLevel = strictnessLevel == null ? DEFAULT_STRICTNESS_LEVEL : strictnessLevel;
return this;
}
/**
* Gets whether {@link #close()} method shall close input stream.
*
* @return true, if {@link #close()} method will close input stream,
* otherwise false.
*/
public boolean isCloseStream() {
return tokens.isCloseStream();
}
/**
* Sets whether {@link #close()} method shall close input stream.
*
* @param closeStream true, if {@link #close()} method shall close input stream,
* otherwise false.
*/
public void setCloseStream(boolean closeStream) {
tokens.setCloseStream(closeStream);
}
/**
* If any exception generated while reading XRef section, PdfReader will try to rebuild it.
*
* @return true, if PdfReader rebuilt Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasRebuiltXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return rebuiltXref;
}
/**
* Some documents contain hybrid XRef, for more information see "7.5.8.4 Compatibility with Applications
* That Do Not Support Compressed Reference Streams" in PDF 32000-1:2008 spec.
*
* @return true, if the document has hybrid Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasHybridXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return hybridXref;
}
/**
* Indicates whether the document has Cross-Reference Streams.
*
* @return true, if the document has Cross-Reference Streams.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasXrefStm() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return xrefStm;
}
/**
* If any exception generated while reading PdfObject, PdfReader will try to fix offsets of all objects.
*
* This method's returned value might change over time, because PdfObjects reading
* can be postponed even up to document closing.
* @return true, if PdfReader fixed offsets of PdfObjects.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasFixedXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return fixedXref;
}
/**
* Gets position of the last Cross-Reference table.
*
* @return -1 if Cross-Reference table has rebuilt, otherwise position of the last Cross-Reference table.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public long getLastXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return lastXref;
}
/**
* Reads, decrypt and optionally decode stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
*
* @param stream a {@link PdfStream} stream instance to be read and optionally decoded.
* @param decode true if to get decoded stream bytes, false if to leave it originally encoded.
* @return byte[] array.
* @throws IOException on error.
*/
public byte[] readStreamBytes(PdfStream stream, boolean decode) throws IOException {
byte[] b = readStreamBytesRaw(stream);
if (decode && b != null) {
return decodeBytes(b, stream);
} else {
return b;
}
}
/**
* Reads and decrypt stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
*
* @param stream a {@link PdfStream} stream instance to be read
* @return byte[] array.
* @throws IOException on error.
*/
public byte[] readStreamBytesRaw(PdfStream stream) throws IOException {
PdfName type = stream.getAsName(PdfName.Type);
if (!PdfName.XRef.equals(type) && !PdfName.ObjStm.equals(type)) {
checkPdfStreamLength(stream);
}
long offset = stream.getOffset();
if (offset <= 0)
return null;
int length = stream.getLength();
if (length <= 0)
return new byte[0];
RandomAccessFileOrArray file = tokens.getSafeFile();
byte[] bytes = null;
try {
file.seek(offset);
bytes = new byte[length];
file.readFully(bytes);
boolean embeddedStream = pdfDocument.doesStreamBelongToEmbeddedFile(stream);
if (decrypt != null && (!decrypt.isEmbeddedFilesOnly() || embeddedStream)) {
PdfObject filter = stream.get(PdfName.Filter, true);
boolean skip = false;
if (filter != null) {
if (filter.isFlushed()) {
IndirectFilterUtils.throwFlushedFilterException(stream);
}
if (PdfName.Crypt.equals(filter)) {
skip = true;
} else if (filter.getType() == PdfObject.ARRAY) {
PdfArray filters = (PdfArray) filter;
for (int k = 0; k < filters.size(); k++) {
if (filters.get(k).isFlushed()) {
IndirectFilterUtils.throwFlushedFilterException(stream);
}
if (!filters.isEmpty() && PdfName.Crypt.equals(filters.get(k, true))) {
skip = true;
break;
}
}
}
filter.release();
}
if (!skip) {
decrypt.setHashKeyForNextObject(stream.getIndirectReference().getObjNumber(), stream.getIndirectReference().getGenNumber());
bytes = decrypt.decryptByteArray(bytes);
}
}
} finally {
try {
file.close();
} catch (Exception ignored) {
}
}
return bytes;
}
/**
* Reads, decrypts and optionally decodes stream bytes into {@link ByteArrayInputStream}.
* User is responsible for closing returned stream.
*
* @param stream a {@link PdfStream} stream instance to be read
* @param decode true if to get decoded stream, false if to leave it originally encoded.
* @return InputStream or {@code null} if reading was failed.
* @throws IOException on error.
*/
public InputStream readStream(PdfStream stream, boolean decode) throws IOException {
byte[] bytes = readStreamBytes(stream, decode);
return bytes != null ? new ByteArrayInputStream(bytes) : null;
}
/**
* Decode bytes applying the filters specified in the provided dictionary using default filter handlers.
*
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
*/
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary) {
return decodeBytes(b, streamDictionary, FilterHandlers.getDefaultFilterHandlers());
}
/**
* Decode a byte[] applying the filters specified in the provided dictionary using the provided filter handlers.
*
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @param filterHandlers the map used to look up a handler for each type of filter
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
*/
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary, Map filterHandlers) {
if (b == null) {
return null;
}
PdfObject filter = streamDictionary.get(PdfName.Filter);
PdfArray filters = new PdfArray();
if (filter != null) {
if (filter.getType() == PdfObject.NAME) {
filters.add(filter);
} else if (filter.getType() == PdfObject.ARRAY) {
filters = ((PdfArray) filter);
}
}
MemoryLimitsAwareHandler memoryLimitsAwareHandler = null;
if (null != streamDictionary.getIndirectReference()) {
memoryLimitsAwareHandler = streamDictionary.getIndirectReference().getDocument().memoryLimitsAwareHandler;
}
final boolean memoryLimitsAwarenessRequired = null != memoryLimitsAwareHandler &&
memoryLimitsAwareHandler.isMemoryLimitsAwarenessRequiredOnDecompression(filters);
if(memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.beginDecompressedPdfStreamProcessing();
}
PdfArray dp = new PdfArray();
PdfObject dpo = streamDictionary.get(PdfName.DecodeParms);
if (dpo == null || (dpo.getType() != PdfObject.DICTIONARY && dpo.getType() != PdfObject.ARRAY)) {
if (dpo != null) dpo.release();
dpo = streamDictionary.get(PdfName.DP);
}
if (dpo != null) {
if (dpo.getType() == PdfObject.DICTIONARY) {
dp.add(dpo);
} else if (dpo.getType() == PdfObject.ARRAY) {
dp = ((PdfArray) dpo);
}
dpo.release();
}
for (int j = 0; j < filters.size(); ++j) {
PdfName filterName = (PdfName) filters.get(j);
IFilterHandler filterHandler = filterHandlers.get(filterName);
if (filterHandler == null)
throw new PdfException(KernelExceptionMessageConstant.THIS_FILTER_IS_NOT_SUPPORTED)
.setMessageParams(filterName);
PdfDictionary decodeParams;
if (j < dp.size()) {
PdfObject dpEntry = dp.get(j, true);
if (dpEntry == null || dpEntry.getType() == PdfObject.NULL) {
decodeParams = null;
} else if (dpEntry.getType() == PdfObject.DICTIONARY) {
decodeParams = (PdfDictionary) dpEntry;
} else {
throw new PdfException(KernelExceptionMessageConstant.THIS_DECODE_PARAMETER_TYPE_IS_NOT_SUPPORTED)
.setMessageParams(dpEntry.getClass().toString());
}
} else {
decodeParams = null;
}
b = filterHandler.decode(b, filterName, decodeParams, streamDictionary);
if (memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.considerBytesOccupiedByDecompressedPdfStream(b.length);
}
}
if (memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.endDecompressedPdfStreamProcessing();
}
return b;
}
/**
* Gets a new file instance of the original PDF
* document.
*
* @return a new file instance of the original PDF document
*/
public RandomAccessFileOrArray getSafeFile() {
return tokens.getSafeFile();
}
/**
* Provides the size of the opened file.
*
* @return The size of the opened file.
*/
public long getFileLength() {
return tokens.getSafeFile().length();
}
/**
* Checks if the document was opened with the owner password so that the end application
* can decide what level of access restrictions to apply. If the document is not encrypted
* it will return {@code true}.
*
* @return {@code true} if the document was opened with the owner password or if it's not encrypted,
* {@code false} if the document was opened with the user password.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean isOpenedWithFullPermission() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return !encrypted || decrypt.isOpenedWithFullPermission() || unethicalReading;
}
/**
* Gets the encryption permissions. It can be used directly in
* {@link WriterProperties#setStandardEncryption(byte[], byte[], int, int)}.
* See ISO 32000-1, Table 22 for more details.
*
* @return the encryption permissions, an unsigned 32-bit quantity.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public long getPermissions() {
/* !pdfDocument.getXref().isReadingCompleted() can be used for encryption properties as well,
* because decrypt object is initialized in private readDecryptObj method which is called in our code
* in the next line after the setting isReadingCompleted line. This means that there's no way for users
* when this method would work incorrectly right now.
*/
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
long perm = 0;
if (encrypted && decrypt.getPermissions() != null) {
perm = (long) decrypt.getPermissions();
}
return perm;
}
/**
* Gets encryption algorithm and access permissions.
*
* @return {@code int} value corresponding to a certain type of encryption.
* @see EncryptionConstants
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public int getCryptoMode() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
if (decrypt == null)
return -1;
else
return decrypt.getCryptoMode();
}
/**
* Gets the declared PDF/A conformance level of the source document that is being read.
* Note that this information is provided via XMP metadata and is not verified by iText.
* {@link PdfReader#pdfAConformanceLevel} is lazy initialized.
* It will be initialized during the first call of this method.
*
* @return conformance level of the source document, or {@code null} if no PDF/A
* conformance level information is specified.
*/
public PdfAConformanceLevel getPdfAConformanceLevel() {
if (pdfAConformanceLevel == null) {
if (pdfDocument != null && pdfDocument.getXmpMetadata() != null) {
try {
pdfAConformanceLevel = PdfAConformanceLevel.getConformanceLevel(
XMPMetaFactory.parseFromBuffer(pdfDocument.getXmpMetadata()));
} catch (XMPException ignored) {
}
}
}
return pdfAConformanceLevel;
}
/**
* Computes user password if standard encryption handler is used with Standard40, Standard128 or AES128 encryption algorithm.
*
* @return user password, or null if not a standard encryption handler was used or if ownerPasswordUsed wasn't use to open the document.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] computeUserPassword() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
if (!encrypted || !decrypt.isOpenedWithFullPermission()) {
return null;
}
return decrypt.computeUserPassword(properties.password);
}
/**
* Gets original file ID, the first element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
*
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getOriginalDocumentId()}.
*
* @return byte array represents original file ID.
* @see PdfDocument#getOriginalDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] getOriginalFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(0).getValue());
} else {
return new byte[0];
}
}
/**
* Gets modified file ID, the second element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
*
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getModifiedDocumentId()}.
*
* @return byte array represents modified file ID.
* @see PdfDocument#getModifiedDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] getModifiedFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(1).getValue());
} else {
return new byte[0];
}
}
/**
* Checks if the {@link PdfDocument} read with this {@link PdfReader} is encrypted.
*
* @return {@code true} is the document is encrypted, otherwise {@code false}.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean isEncrypted() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return encrypted;
}
/**
* Parses the entire PDF
*
* @throws IOException if an I/O error occurs.
*/
protected void readPdf() throws IOException {
String version = tokens.checkPdfHeader();
try {
this.headerPdfVersion = PdfVersion.fromString(version);
} catch (IllegalArgumentException exc) {
throw new PdfException(KernelExceptionMessageConstant.PDF_VERSION_IS_NOT_VALID, version);
}
try {
readXref();
} catch (XrefCycledReferencesException | MemoryLimitsAwareException | InvalidXRefPrevException ex) {
// Throws an exception when xref stream has cycled references(due to lack of opportunity to fix such an
// issue) or xref tables have cycled references and PdfReader.StrictnessLevel set to CONSERVATIVE.
// Also throw an exception when xref structure size exceeds jvm memory limit.
throw ex;
} catch (RuntimeException ex) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.error(IoLogMessageConstant.XREF_ERROR_WHILE_READING_TABLE_WILL_BE_REBUILT, ex);
rebuildXref();
} else {
throw ex;
}
}
pdfDocument.getXref().markReadingCompleted();
readDecryptObj();
}
protected void readObjectStream(PdfStream objectStream) throws IOException {
int objectStreamNumber = objectStream.getIndirectReference().getObjNumber();
int first = objectStream.getAsNumber(PdfName.First).intValue();
int n = objectStream.getAsNumber(PdfName.N).intValue();
byte[] bytes = readStreamBytes(objectStream, true);
PdfTokenizer saveTokens = tokens;
try {
tokens = new PdfTokenizer(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(bytes)));
int[] address = new int[n];
int[] objNumber = new int[n];
boolean ok = true;
for (int k = 0; k < n; ++k) {
ok = tokens.nextToken();
if (!ok)
break;
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
break;
}
objNumber[k] = tokens.getIntValue();
ok = tokens.nextToken();
if (!ok)
break;
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
break;
}
address[k] = tokens.getIntValue() + first;
}
if (!ok)
throw new PdfException(KernelExceptionMessageConstant.ERROR_WHILE_READING_OBJECT_STREAM);
for (int k = 0; k < n; ++k) {
tokens.seek(address[k]);
tokens.nextToken();
PdfObject obj;
PdfIndirectReference reference = pdfDocument.getXref().get(objNumber[k]);
if (reference.refersTo != null || reference.getObjStreamNumber() != objectStreamNumber) {
// We skip reading of objects stream's element k if either it is already available in xref
// or if corresponding indirect object reference points to a different object stream.
// The first check prevents from re-initializing objects which are already read. One of the cases
// when this can happen is that some other object from this objects stream was released and requested
// to be re-read.
// Second check ensures that object has no incremental updates and is not freed in append mode.
continue;
}
if (tokens.getTokenType() == PdfTokenizer.TokenType.Number) {
// This ensure that we don't even try to read as indirect reference token (two numbers and "R")
// which are forbidden in object streams.
obj = new PdfNumber(tokens.getByteContent());
} else {
tokens.seek(address[k]);
obj = readObject(false, true);
}
reference.setRefersTo(obj);
obj.setIndirectReference(reference);
}
objectStream.getIndirectReference().setState(PdfObject.ORIGINAL_OBJECT_STREAM);
} finally {
tokens = saveTokens;
}
}
protected PdfObject readObject(PdfIndirectReference reference) {
return readObject(reference, true);
}
protected PdfObject readObject(boolean readAsDirect) throws IOException {
return readObject(readAsDirect, false);
}
protected PdfObject readReference(boolean readAsDirect) {
int num = tokens.getObjNr();
if (num < 0) {
return createPdfNullInstance(readAsDirect);
}
PdfXrefTable table = pdfDocument.getXref();
PdfIndirectReference reference = table.get(num);
if (reference != null) {
if (reference.isFree()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
}
if (reference.getGenNumber() != tokens.getGenNr()) {
if (fixedXref) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(
MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
} else {
throw new PdfException(KernelExceptionMessageConstant.INVALID_INDIRECT_REFERENCE,
MessageFormatUtil.format("{0} {1} R", reference.getObjNumber(), reference.getGenNumber()));
}
}
} else {
if (table.isReadingCompleted()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
} else {
reference = table.add((PdfIndirectReference) new PdfIndirectReference(pdfDocument,
num, tokens.getGenNr(), 0).setState(PdfObject.READING));
}
}
return reference;
}
protected PdfObject readObject(boolean readAsDirect, boolean objStm) throws IOException {
tokens.nextValidToken();
PdfTokenizer.TokenType type = tokens.getTokenType();
switch (type) {
case StartDic: {
PdfDictionary dict = readDictionary(objStm);
long pos = tokens.getPosition();
// be careful in the trailer. May not be a "next" token.
boolean hasNext;
do {
hasNext = tokens.nextToken();
} while (hasNext && tokens.getTokenType() == PdfTokenizer.TokenType.Comment);
if (hasNext && tokens.tokenValueEqualsTo(PdfTokenizer.Stream)) {
//skip whitespaces
int ch;
do {
ch = tokens.read();
} while (ch == 32 || ch == 9 || ch == 0 || ch == 12);
if (ch != '\n') {
ch = tokens.read();
}
if (ch != '\n') {
tokens.backOnePosition(ch);
}
PdfStream pdfStream = new PdfStream(tokens.getPosition(), dict);
tokens.seek(pdfStream.getOffset() + pdfStream.getLength());
return pdfStream;
} else {
tokens.seek(pos);
return dict;
}
}
case StartArray:
return readArray(objStm);
case Number:
return new PdfNumber(tokens.getByteContent());
case String: {
PdfString pdfString = new PdfString(tokens.getByteContent(), tokens.isHexString());
if (encrypted && !decrypt.isEmbeddedFilesOnly() && !objStm) {
pdfString.setDecryption(currentIndirectReference.getObjNumber(), currentIndirectReference.getGenNumber(), decrypt);
}
return pdfString;
}
case Name:
return readPdfName(readAsDirect);
case Ref:
return readReference(readAsDirect);
case EndOfFile:
throw new PdfException(KernelExceptionMessageConstant.UNEXPECTED_END_OF_FILE);
default:
if (tokens.tokenValueEqualsTo(PdfTokenizer.Null)) {
return createPdfNullInstance(readAsDirect);
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.True)) {
if (readAsDirect) {
return PdfBoolean.TRUE;
} else {
return new PdfBoolean(true);
}
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.False)) {
if (readAsDirect) {
return PdfBoolean.FALSE;
} else {
return new PdfBoolean(false);
}
}
return null;
}
}
protected PdfName readPdfName(boolean readAsDirect) {
if (readAsDirect) {
PdfName cachedName = PdfName.staticNames.get(tokens.getStringValue());
if (cachedName != null)
return cachedName;
}
// an indirect name (how odd...), or a non-standard one
return new PdfName(tokens.getByteContent());
}
protected PdfDictionary readDictionary(boolean objStm) throws IOException {
PdfDictionary dic = new PdfDictionary();
while (true) {
tokens.nextValidToken();
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic) {
break;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Name) {
tokens.throwError(
KernelExceptionMessageConstant.THIS_DICTIONARY_KEY_IS_NOT_A_NAME, tokens.getStringValue());
}
PdfName name = readPdfName(true);
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic)
tokens.throwError(MessageFormatUtil.
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, ">>"));
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndArray)
tokens.throwError(MessageFormatUtil.
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, "]"));
}
dic.put(name, obj);
}
return dic;
}
protected PdfArray readArray(boolean objStm) throws IOException {
PdfArray array = new PdfArray();
while (true) {
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() != PdfTokenizer.TokenType.EndArray) {
processArrayReadError();
}
break;
}
array.add(obj);
}
return array;
}
protected void readXref() throws IOException {
tokens.seek(tokens.getStartxref());
tokens.nextToken();
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Startxref)) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_NOT_FOUND, tokens);
}
tokens.nextToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_IS_NOT_FOLLOWED_BY_A_NUMBER, tokens);
}
long startxref = tokens.getLongValue();
lastXref = startxref;
eofPos = tokens.getPosition();
try {
if (readXrefStream(startxref)) {
xrefStm = true;
return;
}
} catch (XrefCycledReferencesException
| MemoryLimitsAwareException
| InvalidXRefPrevException exceptionWhileReadingXrefStream) {
throw exceptionWhileReadingXrefStream;
} catch (Exception ignored) {
// Do nothing.
}
// clear xref because of possible issues at reading xref stream.
pdfDocument.getXref().clear();
tokens.seek(startxref);
trailer = readXrefSection();
// Prev key - integer value.
// (Present only if the file has more than one cross-reference section; shall be an indirect reference).
// The byte offset in the decoded stream from the beginning of the file
// to the beginning of the previous cross-reference section.
PdfDictionary trailer2 = trailer;
final Set alreadyVisitedXrefTables = new HashSet<>();
while (true) {
alreadyVisitedXrefTables.add(startxref);
PdfNumber prev = getXrefPrev(trailer2.get(PdfName.Prev, false));
if (prev == null) {
break;
}
long prevXrefOffset = prev.longValue();
if (alreadyVisitedXrefTables.contains(prevXrefOffset)) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
// Throw the exception to rebuild xref table, it'll be caught in method above.
throw new PdfException(KernelExceptionMessageConstant.
TRAILER_PREV_ENTRY_POINTS_TO_ITS_OWN_CROSS_REFERENCE_SECTION);
} else {
throw new XrefCycledReferencesException(
KernelExceptionMessageConstant.XREF_TABLE_HAS_CYCLED_REFERENCES);
}
}
startxref = prevXrefOffset;
tokens.seek(startxref);
trailer2 = readXrefSection();
}
Integer xrefSize = trailer.getAsInt(PdfName.Size);
if (xrefSize == null) {
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_TABLE);
}
}
protected PdfDictionary readXrefSection() throws IOException {
tokens.nextValidToken();
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Xref))
tokens.throwError(KernelExceptionMessageConstant.XREF_SUBSECTION_NOT_FOUND);
PdfXrefTable xref = pdfDocument.getXref();
while (true) {
tokens.nextValidToken();
if (tokens.tokenValueEqualsTo(PdfTokenizer.Trailer)) {
break;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
tokens.throwError(
KernelExceptionMessageConstant.OBJECT_NUMBER_OF_THE_FIRST_OBJECT_IN_THIS_XREF_SUBSECTION_NOT_FOUND);
}
int start = tokens.getIntValue();
tokens.nextValidToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
tokens.throwError(KernelExceptionMessageConstant.NUMBER_OF_ENTRIES_IN_THIS_XREF_SUBSECTION_NOT_FOUND);
}
int end = tokens.getIntValue() + start;
for (int num = start; num < end; num++) {
tokens.nextValidToken();
long pos = tokens.getLongValue();
tokens.nextValidToken();
int gen = tokens.getIntValue();
tokens.nextValidToken();
if (pos == 0L && gen == 65535 && num == 1 && start != 0) {
// Very rarely can an XREF have an incorrect start number. (SUP-1557)
// e.g.
// xref
// 1 13
// 0000000000 65535 f
// 0000000009 00000 n
// 0000215136 00000 n
// [...]
// Because of how iText reads (and initializes) the XREF, this will lead to the XREF having two 0000 65535 entries.
// This throws off the parsing and other operations you'd like to perform.
// To fix this we reset our index and decrease the limit when we've encountered the magic entry at position 1.
num = 0;
end--;
continue;
}
PdfIndirectReference reference = xref.get(num);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == gen;
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
reference = new PdfIndirectReference(pdfDocument, num, gen, pos);
} else if (refReadingState) {
reference.setOffset(pos);
reference.clearState(PdfObject.READING);
} else {
continue;
}
if (tokens.tokenValueEqualsTo(PdfTokenizer.N)) {
if (pos == 0) {
tokens.throwError(
KernelExceptionMessageConstant.FILE_POSITION_0_CROSS_REFERENCE_ENTRY_IN_THIS_XREF_SUBSECTION);
}
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.F)) {
if (refFirstEncountered) {
reference.setState(PdfObject.FREE);
}
} else {
tokens.throwError(
KernelExceptionMessageConstant.INVALID_CROSS_REFERENCE_ENTRY_IN_THIS_XREF_SUBSECTION);
}
if (refFirstEncountered) {
xref.add(reference);
}
}
}
PdfDictionary trailer = (PdfDictionary) readObject(false);
PdfObject xrs = trailer.get(PdfName.XRefStm);
if (xrs != null && xrs.getType() == PdfObject.NUMBER) {
int loc = ((PdfNumber) xrs).intValue();
try {
readXrefStream(loc);
xrefStm = true;
hybridXref = true;
} catch (IOException e) {
xref.clear();
throw e;
}
}
return trailer;
}
protected boolean readXrefStream(long ptr) throws IOException {
final Set alreadyVisitedXrefStreams = new HashSet<>();
while (ptr != -1) {
tokens.seek(ptr);
if (!tokens.nextToken()) {
return false;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
}
if (!tokens.nextToken() || tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
}
if (!tokens.nextToken() || !tokens.tokenValueEqualsTo(PdfTokenizer.Obj)) {
return false;
}
alreadyVisitedXrefStreams.add(ptr);
PdfXrefTable xref = pdfDocument.getXref();
PdfObject object = readObject(false);
PdfStream xrefStream;
if (object.getType() == PdfObject.STREAM) {
xrefStream = (PdfStream) object;
if (!PdfName.XRef.equals(xrefStream.get(PdfName.Type))) {
return false;
}
} else {
return false;
}
if (trailer == null) {
trailer = new PdfDictionary();
trailer.putAll(xrefStream);
trailer.remove(PdfName.DecodeParms);
trailer.remove(PdfName.Filter);
trailer.remove(PdfName.Prev);
trailer.remove(PdfName.Length);
}
int size = ((PdfNumber) xrefStream.get(PdfName.Size)).intValue();
PdfArray index;
PdfObject obj = xrefStream.get(PdfName.Index);
if (obj == null) {
index = new PdfArray();
index.add(new PdfNumber(0));
index.add(new PdfNumber(size));
} else {
index = (PdfArray) obj;
}
PdfArray w = xrefStream.getAsArray(PdfName.W);
long prev = -1;
obj = getXrefPrev(xrefStream.get(PdfName.Prev, false));
if (obj != null)
prev = ((PdfNumber) obj).longValue();
xref.setCapacity(size);
byte[] b = readStreamBytes(xrefStream, true);
int bptr = 0;
int[] wc = new int[3];
for (int k = 0; k < 3; ++k) {
wc[k] = w.getAsNumber(k).intValue();
}
for (int idx = 0; idx < index.size(); idx += 2) {
int start = index.getAsNumber(idx).intValue();
int length = index.getAsNumber(idx + 1).intValue();
xref.setCapacity(start + length);
while (length-- > 0) {
int type = 1;
if (wc[0] > 0) {
type = 0;
for (int k = 0; k < wc[0]; ++k) {
type = (type << 8) + (b[bptr++] & 0xff);
}
}
long field2 = 0;
for (int k = 0; k < wc[1]; ++k) {
field2 = (field2 << 8) + (b[bptr++] & 0xff);
}
int field3 = 0;
for (int k = 0; k < wc[2]; ++k) {
field3 = (field3 << 8) + (b[bptr++] & 0xff);
}
int base = start;
PdfIndirectReference newReference;
switch (type) {
case 0:
newReference = (PdfIndirectReference) new PdfIndirectReference(pdfDocument, base, field3, field2).setState(PdfObject.FREE);
break;
case 1:
newReference = new PdfIndirectReference(pdfDocument, base, field3, field2);
break;
case 2:
newReference = new PdfIndirectReference(pdfDocument, base, 0, field3);
newReference.setObjStreamNumber((int) field2);
break;
default:
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_STREAM);
}
PdfIndirectReference reference = xref.get(base);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == newReference.getGenNumber();
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
xref.add(newReference);
} else if (refReadingState) {
reference.setOffset(newReference.getOffset());
reference.setObjStreamNumber(newReference.getObjStreamNumber());
reference.clearState(PdfObject.READING);
}
++start;
}
}
ptr = prev;
if (alreadyVisitedXrefStreams.contains(ptr)) {
throw new XrefCycledReferencesException(
KernelExceptionMessageConstant.XREF_STREAM_HAS_CYCLED_REFERENCES);
}
}
return true;
}
protected void fixXref() throws IOException {
fixedXref = true;
PdfXrefTable xref = pdfDocument.getXref();
tokens.seek(0);
ByteBuffer buffer = new ByteBuffer(24);
PdfTokenizer lineTokeniser = new PdfTokenizer(new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)));
for (; ; ) {
long pos = tokens.getPosition();
buffer.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true))
break;
if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokeniser);
if (obj == null)
continue;
int num = obj[0];
int gen = obj[1];
PdfIndirectReference reference = xref.get(num);
if (reference != null && reference.getGenNumber() == gen) {
reference.fixOffset(pos);
}
}
}
}
protected void rebuildXref() throws IOException {
xrefStm = false;
hybridXref = false;
rebuiltXref = true;
PdfXrefTable xref = pdfDocument.getXref();
xref.clear();
tokens.seek(0);
trailer = null;
ByteBuffer buffer = new ByteBuffer(24);
PdfTokenizer lineTokenizer =
new PdfTokenizer(new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)));
for (; ; ) {
long pos = tokens.getPosition();
buffer.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true))
break;
if (buffer.get(0) == 't') {
if (!PdfTokenizer.checkTrailer(buffer))
continue;
tokens.seek(pos);
tokens.nextToken();
pos = tokens.getPosition();
try {
PdfDictionary dic = (PdfDictionary) readObject(false);
if (dic.get(PdfName.Root, false) != null)
trailer = dic;
else
tokens.seek(pos);
} catch (Exception e) {
tokens.seek(pos);
}
} else if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokenizer);
if (obj == null)
continue;
int num = obj[0];
int gen = obj[1];
if (xref.get(num) == null || xref.get(num).getGenNumber() <= gen) {
xref.add(new PdfIndirectReference(pdfDocument, num, gen, pos));
}
}
}
if (trailer == null) {
throw new PdfException(KernelExceptionMessageConstant.TRAILER_NOT_FOUND);
}
}
protected PdfNumber getXrefPrev(PdfObject prevObjectToCheck) {
if (prevObjectToCheck == null) {
return null;
}
if (prevObjectToCheck.getType() == PdfObject.NUMBER) {
return (PdfNumber) prevObjectToCheck;
} else {
if (prevObjectToCheck.getType() == PdfObject.INDIRECT_REFERENCE &&
StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final PdfObject value = ((PdfIndirectReference) prevObjectToCheck).getRefersTo(true);
if (value != null && value.getType() == PdfObject.NUMBER) {
return (PdfNumber) value;
}
}
throw new InvalidXRefPrevException(
KernelExceptionMessageConstant.XREF_PREV_SHALL_BE_DIRECT_NUMBER_OBJECT);
}
}
boolean isMemorySavingMode() {
return memorySavingMode;
}
private void processArrayReadError() {
final String error = MessageFormatUtil.format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN,
new String(tokens.getByteContent(), StandardCharsets.UTF_8));
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.error(error);
} else {
tokens.throwError(error);
}
}
private void readDecryptObj() {
if (encrypted)
return;
PdfDictionary enc = trailer.getAsDictionary(PdfName.Encrypt);
if (enc == null)
return;
encrypted = true;
PdfName filter = enc.getAsName(PdfName.Filter);
if (PdfName.Adobe_PubSec.equals(filter)) {
if (properties.certificate == null) {
throw new PdfException(
KernelExceptionMessageConstant.CERTIFICATE_IS_NOT_PROVIDED_DOCUMENT_IS_ENCRYPTED_WITH_PUBLIC_KEY_CERTIFICATE);
}
decrypt = new PdfEncryption(enc, properties.certificateKey, properties.certificate,
properties.certificateKeyProvider, properties.externalDecryptionProcess);
} else if (PdfName.Standard.equals(filter)) {
decrypt = new PdfEncryption(enc, properties.password, getOriginalFileId());
} else {
throw new UnsupportedSecurityHandlerException(MessageFormatUtil.format(UnsupportedSecurityHandlerException.UnsupportedSecurityHandler, filter));
}
}
private PdfObject readObject(PdfIndirectReference reference, boolean fixXref) {
if (reference == null)
return null;
if (reference.refersTo != null)
return reference.refersTo;
try {
currentIndirectReference = reference;
if (reference.getObjStreamNumber() > 0) {
PdfStream objectStream = (PdfStream) pdfDocument.getXref().
get(reference.getObjStreamNumber()).getRefersTo(false);
readObjectStream(objectStream);
return reference.refersTo;
} else if (reference.getOffset() > 0) {
PdfObject object;
try {
tokens.seek(reference.getOffset());
tokens.nextValidToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Obj
|| tokens.getObjNr() != reference.getObjNumber()
|| tokens.getGenNr() != reference.getGenNumber()) {
tokens.throwError(
KernelExceptionMessageConstant.INVALID_OFFSET_FOR_THIS_OBJECT, reference.toString());
}
object = readObject(false);
} catch (RuntimeException ex) {
if (fixXref && reference.getObjStreamNumber() == 0) {
fixXref();
object = readObject(reference, false);
} else {
throw ex;
}
}
return object != null ? object.setIndirectReference(reference) : null;
} else {
return null;
}
} catch (IOException e) {
throw new PdfException(KernelExceptionMessageConstant.CANNOT_READ_PDF_OBJECT, e);
}
}
private void checkPdfStreamLength(PdfStream pdfStream) throws IOException {
if (!correctStreamLength)
return;
long fileLength = tokens.length();
long start = pdfStream.getOffset();
boolean calc = false;
int streamLength = 0;
PdfNumber pdfNumber = pdfStream.getAsNumber(PdfName.Length);
if (pdfNumber != null) {
streamLength = pdfNumber.intValue();
if (streamLength + start > fileLength - 20) {
calc = true;
} else {
tokens.seek(start + streamLength);
String line = tokens.readString(20);
if (!line.startsWith(endstream2) && !line.startsWith(endstream3) &&
!line.startsWith(endstream4) && !line.startsWith(endstream1)) {
calc = true;
}
}
} else {
pdfNumber = new PdfNumber(0);
pdfStream.put(PdfName.Length, pdfNumber);
calc = true;
}
if (calc) {
ByteBuffer line = new ByteBuffer(16);
tokens.seek(start);
long pos;
while (true) {
pos = tokens.getPosition();
line.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(line, false)) {
if (!StrictnessLevel.CONSERVATIVE.isStricter(this.strictnessLevel)) {
throw new PdfException(KernelExceptionMessageConstant.STREAM_SHALL_END_WITH_ENDSTREAM);
}
break;
}
if (line.startsWith(endstream)) {
break;
} else if (line.startsWith(endobj)) {
tokens.seek(pos - 16);
String s = tokens.readString(16);
int index = s.indexOf(endstream1);
if (index >= 0)
pos = pos - 16 + index;
break;
}
}
streamLength = (int) (pos - start);
tokens.seek(pos - 2);
if (tokens.read() == 13) {
streamLength--;
}
tokens.seek(pos - 1);
if (tokens.read() == 10) {
streamLength--;
}
pdfNumber.setValue(streamLength);
pdfStream.updateLength(streamLength);
}
}
private PdfObject createPdfNullInstance(boolean readAsDirect) {
if (readAsDirect) {
return PdfNull.PDF_NULL;
} else {
return new PdfNull();
}
}
/**
* Utility method that checks the provided byte source to see if it has junk bytes at the beginning. If junk bytes
* are found, construct a tokeniser that ignores the junk. Otherwise, construct a tokeniser for the byte source as it is
*
* @param byteSource the source to check
* @return a tokeniser that is guaranteed to start at the PDF header
* @throws IOException if there is a problem reading the byte source
*/
private static PdfTokenizer getOffsetTokeniser(IRandomAccessSource byteSource, boolean closeStream)
throws IOException {
PdfTokenizer tok = new PdfTokenizer(new RandomAccessFileOrArray(byteSource));
int offset;
try {
offset = tok.getHeaderOffset();
} catch (com.itextpdf.io.exceptions.IOException ex) {
if (closeStream) {
tok.close();
}
throw ex;
}
if (offset != 0) {
IRandomAccessSource offsetSource = new WindowRandomAccessSource(byteSource, offset);
tok = new PdfTokenizer(new RandomAccessFileOrArray(offsetSource));
}
return tok;
}
protected static class ReusableRandomAccessSource implements IRandomAccessSource {
private ByteBuffer buffer;
public ReusableRandomAccessSource(ByteBuffer buffer) {
if (buffer == null) throw new IllegalArgumentException("Passed byte buffer can not be null.");
this.buffer = buffer;
}
@Override
public int get(long offset) {
if (offset >= buffer.size()) return -1;
return 0xff & buffer.getInternalBuffer()[(int) offset];
}
@Override
public int get(long offset, byte[] bytes, int off, int len) {
if (buffer == null) throw new IllegalStateException("Already closed");
if (offset >= buffer.size())
return -1;
if (offset + len > buffer.size())
len = (int) (buffer.size() - offset);
System.arraycopy(buffer.getInternalBuffer(), (int) offset, bytes, off, len);
return len;
}
@Override
public long length() {
return buffer.size();
}
@Override
public void close() {
buffer = null;
}
}
/**
* Enumeration representing the strictness level for reading.
*/
public enum StrictnessLevel {
/**
* The reading strictness level at which iText fails (throws an exception) in case of
* contradiction with PDF specification, but still recovers from mild parsing errors
* and ambiguities.
*/
CONSERVATIVE(5000),
/**
* The reading strictness level at which iText tries to recover from parsing
* errors if possible.
*/
LENIENT(3000);
private final int levelValue;
StrictnessLevel(int levelValue) {
this.levelValue = levelValue;
}
/**
* Checks whether the current instance represents more strict reading level than
* the provided one. Note that the {@code null} is less strict than any other value.
*
* @param compareWith the {@link StrictnessLevel} to compare with
*
* @return {@code true} if the current level is stricter than the provided one
*/
public boolean isStricter(StrictnessLevel compareWith) {
return compareWith == null || this.levelValue > compareWith.levelValue;
}
}
}