com.itextpdf.kernel.pdf.PdfReader Maven / Gradle / Ivy
/*
This file is part of the iText (R) project.
Copyright (c) 1998-2024 Apryse Group NV
Authors: Apryse Software.
This program is offered under a commercial and under the AGPL license.
For commercial licensing, contact us at https://itextpdf.com/sales. For AGPL licensing, see below.
AGPL licensing:
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see .
*/
package com.itextpdf.kernel.pdf;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteBuffer;
import com.itextpdf.io.source.ByteUtils;
import com.itextpdf.io.source.IRandomAccessSource;
import com.itextpdf.io.source.PdfTokenizer;
import com.itextpdf.io.source.RASInputStream;
import com.itextpdf.io.source.RandomAccessFileOrArray;
import com.itextpdf.io.source.RandomAccessSourceFactory;
import com.itextpdf.io.source.WindowRandomAccessSource;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.kernel.exceptions.InvalidXRefPrevException;
import com.itextpdf.kernel.exceptions.MemoryLimitsAwareException;
import com.itextpdf.kernel.exceptions.PdfException;
import com.itextpdf.kernel.crypto.securityhandler.UnsupportedSecurityHandlerException;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.XrefCycledReferencesException;
import com.itextpdf.kernel.pdf.filters.FilterHandlers;
import com.itextpdf.kernel.pdf.filters.IFilterHandler;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMeta;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Reads a PDF document.
*/
public class PdfReader implements Closeable {
/**
* The default {@link StrictnessLevel} to be used.
*/
public static final StrictnessLevel DEFAULT_STRICTNESS_LEVEL = StrictnessLevel.LENIENT;
private static final String endstream1 = "endstream";
private static final String endstream2 = "\nendstream";
private static final String endstream3 = "\r\nendstream";
private static final String endstream4 = "\rendstream";
private static final byte[] endstream = ByteUtils.getIsoBytes("endstream");
private static final byte[] endobj = ByteUtils.getIsoBytes("endobj");
protected static boolean correctStreamLength = true;
private boolean unethicalReading;
private boolean memorySavingMode;
private StrictnessLevel strictnessLevel = DEFAULT_STRICTNESS_LEVEL;
//indicate nearest first Indirect reference object which includes current reading the object, using for PdfString decrypt
private PdfIndirectReference currentIndirectReference;
private XMPMeta xmpMeta;
private XrefProcessor xrefProcessor = new XrefProcessor();
protected PdfTokenizer tokens;
protected PdfEncryption decrypt;
// here we store only the pdfVersion that is written in the document's header,
// however it could differ from the actual pdf version that could be written in document's catalog
protected PdfVersion headerPdfVersion;
protected long lastXref;
protected long eofPos;
protected PdfDictionary trailer;
protected PdfDocument pdfDocument;
protected PdfAConformanceLevel pdfAConformanceLevel;
protected ReaderProperties properties;
protected boolean encrypted = false;
protected boolean rebuiltXref = false;
protected boolean hybridXref = false;
protected boolean fixedXref = false;
protected boolean xrefStm = false;
/**
* Constructs a new PdfReader.
*
* @param byteSource source of bytes for the reader
* @param properties properties of the created reader
* @throws IOException if an I/O error occurs
*/
public PdfReader(IRandomAccessSource byteSource, ReaderProperties properties) throws IOException {
this(byteSource, properties, false);
}
/**
* Reads and parses a PDF document.
*
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
* @param properties properties of the created reader
*
* @throws IOException on error
*/
public PdfReader(InputStream is, ReaderProperties properties) throws IOException {
this(new RandomAccessSourceFactory().extractOrCreateSource(is), properties, true);
}
/**
* Reads and parses a PDF document.
*
* @param file the {@code File} containing the document.
* @throws IOException on error
* @throws FileNotFoundException when the specified File is not found
*/
public PdfReader(java.io.File file) throws FileNotFoundException, IOException {
this(file.getAbsolutePath());
}
/**
* Reads and parses a PDF document.
*
* @param is the {@code InputStream} containing the document. If the inputStream is an instance of
* {@link RASInputStream} then the {@link IRandomAccessSource} would be extracted. Otherwise the stream
* is read to the end but is not closed.
*
* @throws IOException on error
*/
public PdfReader(InputStream is) throws IOException {
this(is, new ReaderProperties());
}
/**
* Reads and parses a PDF document.
*
* @param filename the file name of the document
* @param properties properties of the created reader
* @throws IOException on error
*/
public PdfReader(String filename, ReaderProperties properties) throws IOException {
this(
new RandomAccessSourceFactory()
.setForceRead(false)
.createBestSource(filename),
properties,
true
);
}
/**
* Reads and parses a PDF document.
*
* @param filename the file name of the document
* @throws IOException on error
*/
public PdfReader(String filename) throws IOException {
this(filename, new ReaderProperties());
}
/**
* Reads and parses a PDF document.
*
* @param file the file of the document
* @param properties properties of the created reader
* @throws IOException on error
*/
public PdfReader(File file, ReaderProperties properties) throws IOException {
this(file.getAbsolutePath(), properties);
}
PdfReader(IRandomAccessSource byteSource, ReaderProperties properties, boolean closeStream) throws IOException {
this.properties = properties;
this.tokens = getOffsetTokeniser(byteSource, closeStream);
}
/**
* Close {@link PdfTokenizer}.
*
* @throws IOException on error.
*/
public void close() throws IOException {
tokens.close();
}
/**
* The iText is not responsible if you decide to change the
* value of this parameter.
*
* @param unethicalReading true to enable unethicalReading, false to disable it.
* By default unethicalReading is disabled.
* @return this {@link PdfReader} instance.
*/
public PdfReader setUnethicalReading(boolean unethicalReading) {
this.unethicalReading = unethicalReading;
return this;
}
/**
* Defines if memory saving mode is enabled.
*
* By default memory saving mode is disabled for the sake of time–memory trade-off.
*
* If memory saving mode is enabled, document processing might slow down, but reading will be less memory demanding.
*
* @param memorySavingMode true to enable memory saving mode, false to disable it.
* @return this {@link PdfReader} instance.
*/
public PdfReader setMemorySavingMode(boolean memorySavingMode) {
this.memorySavingMode = memorySavingMode;
return this;
}
/**
* Get the current {@link StrictnessLevel} of the reader.
*
* @return the current {@link StrictnessLevel}
*/
public StrictnessLevel getStrictnessLevel() {
return strictnessLevel;
}
/**
* Set the {@link StrictnessLevel} for the reader. If the argument is {@code null}, then
* the {@link PdfReader#DEFAULT_STRICTNESS_LEVEL} will be used.
*
* @param strictnessLevel the {@link StrictnessLevel} to set
*
* @return this {@link PdfReader} instance
*/
public PdfReader setStrictnessLevel(StrictnessLevel strictnessLevel) {
this.strictnessLevel = strictnessLevel == null ? DEFAULT_STRICTNESS_LEVEL : strictnessLevel;
return this;
}
/**
* Gets whether {@link #close()} method shall close input stream.
*
* @return true, if {@link #close()} method will close input stream,
* otherwise false.
*/
public boolean isCloseStream() {
return tokens.isCloseStream();
}
/**
* Sets whether {@link #close()} method shall close input stream.
*
* @param closeStream true, if {@link #close()} method shall close input stream,
* otherwise false.
*/
public void setCloseStream(boolean closeStream) {
tokens.setCloseStream(closeStream);
}
/**
* If any exception generated while reading XRef section, PdfReader will try to rebuild it.
*
* @return true, if PdfReader rebuilt Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasRebuiltXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return rebuiltXref;
}
/**
* Some documents contain hybrid XRef, for more information see "7.5.8.4 Compatibility with Applications
* That Do Not Support Compressed Reference Streams" in PDF 32000-1:2008 spec.
*
* @return true, if the document has hybrid Cross-Reference section.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasHybridXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return hybridXref;
}
/**
* Indicates whether the document has Cross-Reference Streams.
*
* @return true, if the document has Cross-Reference Streams.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasXrefStm() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return xrefStm;
}
/**
* If any exception generated while reading PdfObject, PdfReader will try to fix offsets of all objects.
*
* This method's returned value might change over time, because PdfObjects reading
* can be postponed even up to document closing.
* @return true, if PdfReader fixed offsets of PdfObjects.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean hasFixedXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return fixedXref;
}
/**
* Gets position of the last Cross-Reference table.
*
* @return -1 if Cross-Reference table has rebuilt, otherwise position of the last Cross-Reference table.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public long getLastXref() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return lastXref;
}
/**
* Reads, decrypt and optionally decode stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
*
* @param stream a {@link PdfStream} stream instance to be read and optionally decoded.
* @param decode true if to get decoded stream bytes, false if to leave it originally encoded.
* @return byte[] array.
* @throws IOException on error.
*/
public byte[] readStreamBytes(PdfStream stream, boolean decode) throws IOException {
byte[] b = readStreamBytesRaw(stream);
if (decode && b != null) {
return decodeBytes(b, stream);
} else {
return b;
}
}
/**
* Reads and decrypt stream bytes.
* Note, this method doesn't store actual bytes in any internal structures.
*
* @param stream a {@link PdfStream} stream instance to be read
* @return byte[] array.
* @throws IOException on error.
*/
public byte[] readStreamBytesRaw(PdfStream stream) throws IOException {
PdfName type = stream.getAsName(PdfName.Type);
if (!PdfName.XRef.equals(type) && !PdfName.ObjStm.equals(type)) {
checkPdfStreamLength(stream);
}
long offset = stream.getOffset();
if (offset <= 0)
return null;
int length = stream.getLength();
if (length <= 0)
return new byte[0];
RandomAccessFileOrArray file = tokens.getSafeFile();
byte[] bytes = null;
try {
file.seek(offset);
bytes = new byte[length];
file.readFully(bytes);
boolean embeddedStream = pdfDocument.doesStreamBelongToEmbeddedFile(stream);
if (decrypt != null && (!decrypt.isEmbeddedFilesOnly() || embeddedStream)) {
PdfObject filter = stream.get(PdfName.Filter, true);
boolean skip = false;
if (filter != null) {
if (filter.isFlushed()) {
IndirectFilterUtils.throwFlushedFilterException(stream);
}
if (PdfName.Crypt.equals(filter)) {
skip = true;
} else if (filter.getType() == PdfObject.ARRAY) {
PdfArray filters = (PdfArray) filter;
for (int k = 0; k < filters.size(); k++) {
if (filters.get(k).isFlushed()) {
IndirectFilterUtils.throwFlushedFilterException(stream);
}
if (!filters.isEmpty() && PdfName.Crypt.equals(filters.get(k, true))) {
skip = true;
break;
}
}
}
filter.release();
}
if (!skip) {
decrypt.setHashKeyForNextObject(stream.getIndirectReference().getObjNumber(), stream.getIndirectReference().getGenNumber());
bytes = decrypt.decryptByteArray(bytes);
}
}
} finally {
try {
file.close();
} catch (Exception ignored) {
}
}
return bytes;
}
/**
* Reads, decrypts and optionally decodes stream bytes into {@link ByteArrayInputStream}.
* User is responsible for closing returned stream.
*
* @param stream a {@link PdfStream} stream instance to be read
* @param decode true if to get decoded stream, false if to leave it originally encoded.
* @return InputStream or {@code null} if reading was failed.
* @throws IOException on error.
*/
public InputStream readStream(PdfStream stream, boolean decode) throws IOException {
byte[] bytes = readStreamBytes(stream, decode);
return bytes != null ? new ByteArrayInputStream(bytes) : null;
}
/**
* Decode bytes applying the filters specified in the provided dictionary using default filter handlers.
*
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
*/
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary) {
return decodeBytes(b, streamDictionary, FilterHandlers.getDefaultFilterHandlers());
}
/**
* Decode a byte[] applying the filters specified in the provided dictionary using the provided filter handlers.
*
* @param b the bytes to decode
* @param streamDictionary the dictionary that contains filter information
* @param filterHandlers the map used to look up a handler for each type of filter
* @return the decoded bytes
* @throws PdfException if there are any problems decoding the bytes
*/
public static byte[] decodeBytes(byte[] b, PdfDictionary streamDictionary, Map filterHandlers) {
if (b == null) {
return null;
}
PdfObject filter = streamDictionary.get(PdfName.Filter);
PdfArray filters = new PdfArray();
if (filter != null) {
if (filter.getType() == PdfObject.NAME) {
filters.add(filter);
} else if (filter.getType() == PdfObject.ARRAY) {
filters = ((PdfArray) filter);
}
}
MemoryLimitsAwareHandler memoryLimitsAwareHandler = null;
if (null != streamDictionary.getIndirectReference()) {
memoryLimitsAwareHandler = streamDictionary.getIndirectReference().getDocument().memoryLimitsAwareHandler;
}
final boolean memoryLimitsAwarenessRequired = null != memoryLimitsAwareHandler &&
memoryLimitsAwareHandler.isMemoryLimitsAwarenessRequiredOnDecompression(filters);
if(memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.beginDecompressedPdfStreamProcessing();
}
PdfArray dp = new PdfArray();
PdfObject dpo = streamDictionary.get(PdfName.DecodeParms);
if (dpo == null || (dpo.getType() != PdfObject.DICTIONARY && dpo.getType() != PdfObject.ARRAY)) {
if (dpo != null) dpo.release();
dpo = streamDictionary.get(PdfName.DP);
}
if (dpo != null) {
if (dpo.getType() == PdfObject.DICTIONARY) {
dp.add(dpo);
} else if (dpo.getType() == PdfObject.ARRAY) {
dp = ((PdfArray) dpo);
}
dpo.release();
}
for (int j = 0; j < filters.size(); ++j) {
PdfName filterName = (PdfName) filters.get(j);
IFilterHandler filterHandler = filterHandlers.get(filterName);
if (filterHandler == null)
throw new PdfException(KernelExceptionMessageConstant.THIS_FILTER_IS_NOT_SUPPORTED)
.setMessageParams(filterName);
PdfDictionary decodeParams;
if (j < dp.size()) {
PdfObject dpEntry = dp.get(j, true);
if (dpEntry == null || dpEntry.getType() == PdfObject.NULL) {
decodeParams = null;
} else if (dpEntry.getType() == PdfObject.DICTIONARY) {
decodeParams = (PdfDictionary) dpEntry;
} else {
throw new PdfException(KernelExceptionMessageConstant.THIS_DECODE_PARAMETER_TYPE_IS_NOT_SUPPORTED)
.setMessageParams(dpEntry.getClass().toString());
}
} else {
decodeParams = null;
}
b = filterHandler.decode(b, filterName, decodeParams, streamDictionary);
if (memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.considerBytesOccupiedByDecompressedPdfStream(b.length);
}
}
if (memoryLimitsAwarenessRequired) {
memoryLimitsAwareHandler.endDecompressedPdfStreamProcessing();
}
return b;
}
/**
* Gets a new file instance of the original PDF
* document.
*
* @return a new file instance of the original PDF document
*/
public RandomAccessFileOrArray getSafeFile() {
return tokens.getSafeFile();
}
/**
* Provides the size of the opened file.
*
* @return The size of the opened file.
*/
public long getFileLength() {
return tokens.getSafeFile().length();
}
/**
* Checks if the document was opened with the owner password so that the end application
* can decide what level of access restrictions to apply. If the document is not encrypted
* it will return {@code true}.
*
* @return {@code true} if the document was opened with the owner password or if it's not encrypted,
* {@code false} if the document was opened with the user password.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean isOpenedWithFullPermission() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return !encrypted || decrypt.isOpenedWithFullPermission() || unethicalReading;
}
/**
* Gets the encryption permissions. It can be used directly in
* {@link WriterProperties#setStandardEncryption(byte[], byte[], int, int)}.
* See ISO 32000-1, Table 22 for more details.
*
* @return the encryption permissions, an unsigned 32-bit quantity.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public long getPermissions() {
/* !pdfDocument.getXref().isReadingCompleted() can be used for encryption properties as well,
* because decrypt object is initialized in private readDecryptObj method which is called in our code
* in the next line after the setting isReadingCompleted line. This means that there's no way for users
* when this method would work incorrectly right now.
*/
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
long perm = 0;
if (encrypted && decrypt.getPermissions() != null) {
perm = (long) decrypt.getPermissions();
}
return perm;
}
/**
* Gets encryption algorithm and access permissions.
*
* @return {@code int} value corresponding to a certain type of encryption.
* @see EncryptionConstants
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public int getCryptoMode() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
if (decrypt == null)
return -1;
else
return decrypt.getCryptoMode();
}
/**
* Gets the declared PDF/A conformance level of the source document that is being read.
* Note that this information is provided via XMP metadata and is not verified by iText.
* {@link PdfReader#pdfAConformanceLevel} is lazy initialized.
* It will be initialized during the first call of this method.
*
* @return conformance level of the source document, or {@code null} if no PDF/A
* conformance level information is specified.
*/
public PdfAConformanceLevel getPdfAConformanceLevel() {
if (pdfAConformanceLevel == null) {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
try {
if (xmpMeta == null && pdfDocument.getXmpMetadata() != null) {
xmpMeta = XMPMetaFactory.parseFromBuffer(pdfDocument.getXmpMetadata());
}
if (xmpMeta != null) {
pdfAConformanceLevel = PdfAConformanceLevel.getConformanceLevel(xmpMeta);
}
} catch (XMPException ignored) {
}
}
return pdfAConformanceLevel;
}
/**
* Computes user password if standard encryption handler is used with Standard40, Standard128 or AES128 encryption algorithm.
*
* @return user password, or null if not a standard encryption handler was used or if ownerPasswordUsed wasn't use to open the document.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] computeUserPassword() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
if (!encrypted || !decrypt.isOpenedWithFullPermission()) {
return null;
}
return decrypt.computeUserPassword(properties.password);
}
/**
* Gets original file ID, the first element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
*
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getOriginalDocumentId()}.
*
* @return byte array represents original file ID.
* @see PdfDocument#getOriginalDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] getOriginalFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(0).getValue());
} else {
return new byte[0];
}
}
/**
* Gets modified file ID, the second element in {@link PdfName#ID} key of trailer.
* If the size of ID array does not equal 2, an empty array will be returned.
*
* The returned value reflects the value that was written in opened document. If document is modified,
* the ultimate document id can be retrieved from {@link PdfDocument#getModifiedDocumentId()}.
*
* @return byte array represents modified file ID.
* @see PdfDocument#getModifiedDocumentId()
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public byte[] getModifiedFileId() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
PdfArray id = trailer.getAsArray(PdfName.ID);
if (id != null && id.size() == 2) {
return ByteUtils.getIsoBytes(id.getAsString(1).getValue());
} else {
return new byte[0];
}
}
/**
* Checks if the {@link PdfDocument} read with this {@link PdfReader} is encrypted.
*
* @return {@code true} is the document is encrypted, otherwise {@code false}.
* @throws PdfException if the method has been invoked before the PDF document was read.
*/
public boolean isEncrypted() {
if (pdfDocument == null || !pdfDocument.getXref().isReadingCompleted()) {
throw new PdfException(KernelExceptionMessageConstant.DOCUMENT_HAS_NOT_BEEN_READ_YET);
}
return encrypted;
}
/**
* Parses the entire PDF
*
* @throws IOException if an I/O error occurs.
*/
protected void readPdf() throws IOException {
String version = tokens.checkPdfHeader();
try {
this.headerPdfVersion = PdfVersion.fromString(version);
} catch (IllegalArgumentException exc) {
throw new PdfException(KernelExceptionMessageConstant.PDF_VERSION_IS_NOT_VALID, version);
}
try {
readXref();
} catch (XrefCycledReferencesException | MemoryLimitsAwareException | InvalidXRefPrevException ex) {
// Throws an exception when xref stream has cycled references(due to lack of opportunity to fix such an
// issue) or xref tables have cycled references and PdfReader.StrictnessLevel set to CONSERVATIVE.
// Also throw an exception when xref structure size exceeds jvm memory limit.
throw ex;
} catch (RuntimeException ex) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.error(IoLogMessageConstant.XREF_ERROR_WHILE_READING_TABLE_WILL_BE_REBUILT, ex);
rebuildXref();
} else {
throw ex;
}
}
pdfDocument.getXref().markReadingCompleted();
readDecryptObj();
}
protected void readObjectStream(PdfStream objectStream) throws IOException {
int objectStreamNumber = objectStream.getIndirectReference().getObjNumber();
int first = objectStream.getAsNumber(PdfName.First).intValue();
int n = objectStream.getAsNumber(PdfName.N).intValue();
byte[] bytes = readStreamBytes(objectStream, true);
PdfTokenizer saveTokens = tokens;
try {
tokens = new PdfTokenizer(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(bytes)));
int[] address = new int[n];
int[] objNumber = new int[n];
boolean ok = true;
for (int k = 0; k < n; ++k) {
ok = tokens.nextToken();
if (!ok)
break;
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
break;
}
objNumber[k] = tokens.getIntValue();
ok = tokens.nextToken();
if (!ok)
break;
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
ok = false;
break;
}
address[k] = tokens.getIntValue() + first;
}
if (!ok)
throw new PdfException(KernelExceptionMessageConstant.ERROR_WHILE_READING_OBJECT_STREAM);
for (int k = 0; k < n; ++k) {
tokens.seek(address[k]);
tokens.nextToken();
PdfObject obj;
PdfIndirectReference reference = pdfDocument.getXref().get(objNumber[k]);
if (reference.refersTo != null || reference.getObjStreamNumber() != objectStreamNumber) {
// We skip reading of objects stream's element k if either it is already available in xref
// or if corresponding indirect object reference points to a different object stream.
// The first check prevents from re-initializing objects which are already read. One of the cases
// when this can happen is that some other object from this objects stream was released and requested
// to be re-read.
// Second check ensures that object has no incremental updates and is not freed in append mode.
continue;
}
if (tokens.getTokenType() == PdfTokenizer.TokenType.Number) {
// This ensure that we don't even try to read as indirect reference token (two numbers and "R")
// which are forbidden in object streams.
obj = new PdfNumber(tokens.getByteContent());
} else {
tokens.seek(address[k]);
obj = readObject(false, true);
}
reference.setRefersTo(obj);
obj.setIndirectReference(reference);
}
objectStream.getIndirectReference().setState(PdfObject.ORIGINAL_OBJECT_STREAM);
} finally {
tokens = saveTokens;
}
}
protected PdfObject readObject(PdfIndirectReference reference) {
return readObject(reference, true);
}
protected PdfObject readObject(boolean readAsDirect) throws IOException {
return readObject(readAsDirect, false);
}
protected PdfObject readReference(boolean readAsDirect) {
int num = tokens.getObjNr();
if (num < 0) {
return createPdfNullInstance(readAsDirect);
}
PdfXrefTable table = pdfDocument.getXref();
PdfIndirectReference reference = table.get(num);
if (reference != null) {
if (reference.isFree()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
}
if (reference.getGenNumber() != tokens.getGenNr()) {
if (fixedXref) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(
MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
} else {
throw new PdfException(KernelExceptionMessageConstant.INVALID_INDIRECT_REFERENCE,
MessageFormatUtil.format("{0} {1} R", reference.getObjNumber(), reference.getGenNumber()));
}
}
} else {
if (table.isReadingCompleted()) {
Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.warn(MessageFormatUtil.format(IoLogMessageConstant.INVALID_INDIRECT_REFERENCE, tokens.getObjNr(),
tokens.getGenNr()));
return createPdfNullInstance(readAsDirect);
} else {
reference = table.add((PdfIndirectReference) new PdfIndirectReference(pdfDocument,
num, tokens.getGenNr(), 0).setState(PdfObject.READING));
}
}
return reference;
}
protected PdfObject readObject(boolean readAsDirect, boolean objStm) throws IOException {
tokens.nextValidToken();
PdfTokenizer.TokenType type = tokens.getTokenType();
switch (type) {
case StartDic: {
PdfDictionary dict = readDictionary(objStm);
long pos = tokens.getPosition();
// be careful in the trailer. May not be a "next" token.
boolean hasNext;
do {
hasNext = tokens.nextToken();
} while (hasNext && tokens.getTokenType() == PdfTokenizer.TokenType.Comment);
if (hasNext && tokens.tokenValueEqualsTo(PdfTokenizer.Stream)) {
//skip whitespaces
int ch;
do {
ch = tokens.read();
} while (ch == 32 || ch == 9 || ch == 0 || ch == 12);
if (ch != '\n') {
ch = tokens.read();
}
if (ch != '\n') {
tokens.backOnePosition(ch);
}
PdfStream pdfStream = new PdfStream(tokens.getPosition(), dict);
tokens.seek(pdfStream.getOffset() + pdfStream.getLength());
return pdfStream;
} else {
tokens.seek(pos);
return dict;
}
}
case StartArray:
return readArray(objStm);
case Number:
return new PdfNumber(tokens.getByteContent());
case String: {
PdfString pdfString = new PdfString(tokens.getByteContent(), tokens.isHexString());
if (encrypted && !decrypt.isEmbeddedFilesOnly() && !objStm) {
pdfString.setDecryption(currentIndirectReference.getObjNumber(), currentIndirectReference.getGenNumber(), decrypt);
}
return pdfString;
}
case Name:
return readPdfName(readAsDirect);
case Ref:
return readReference(readAsDirect);
case EndOfFile:
throw new PdfException(KernelExceptionMessageConstant.UNEXPECTED_END_OF_FILE);
default:
if (tokens.tokenValueEqualsTo(PdfTokenizer.Null)) {
return createPdfNullInstance(readAsDirect);
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.True)) {
if (readAsDirect) {
return PdfBoolean.TRUE;
} else {
return new PdfBoolean(true);
}
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.False)) {
if (readAsDirect) {
return PdfBoolean.FALSE;
} else {
return new PdfBoolean(false);
}
}
return null;
}
}
protected PdfName readPdfName(boolean readAsDirect) {
if (readAsDirect) {
PdfName cachedName = PdfName.staticNames.get(tokens.getStringValue());
if (cachedName != null)
return cachedName;
}
// an indirect name (how odd...), or a non-standard one
return new PdfName(tokens.getByteContent());
}
protected PdfDictionary readDictionary(boolean objStm) throws IOException {
PdfDictionary dic = new PdfDictionary();
while (true) {
tokens.nextValidToken();
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic) {
break;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Name) {
tokens.throwError(
KernelExceptionMessageConstant.THIS_DICTIONARY_KEY_IS_NOT_A_NAME, tokens.getStringValue());
}
PdfName name = readPdfName(true);
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndDic)
tokens.throwError(MessageFormatUtil.
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, ">>"));
if (tokens.getTokenType() == PdfTokenizer.TokenType.EndArray)
tokens.throwError(MessageFormatUtil.
format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN, "]"));
}
dic.put(name, obj);
}
return dic;
}
protected PdfArray readArray(boolean objStm) throws IOException {
PdfArray array = new PdfArray();
while (true) {
PdfObject obj = readObject(true, objStm);
if (obj == null) {
if (tokens.getTokenType() != PdfTokenizer.TokenType.EndArray) {
processArrayReadError();
}
break;
}
array.add(obj);
}
return array;
}
protected void readXref() throws IOException {
tokens.seek(tokens.getStartxref());
tokens.nextToken();
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Startxref)) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_NOT_FOUND, tokens);
}
tokens.nextToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
throw new PdfException(KernelExceptionMessageConstant.PDF_STARTXREF_IS_NOT_FOLLOWED_BY_A_NUMBER, tokens);
}
long startxref = tokens.getLongValue();
lastXref = startxref;
eofPos = tokens.getPosition();
try {
if (readXrefStream(startxref)) {
xrefStm = true;
return;
}
} catch (XrefCycledReferencesException
| MemoryLimitsAwareException
| InvalidXRefPrevException exceptionWhileReadingXrefStream) {
throw exceptionWhileReadingXrefStream;
} catch (Exception ignored) {
// Do nothing.
}
// clear xref because of possible issues at reading xref stream.
pdfDocument.getXref().clear();
tokens.seek(startxref);
trailer = readXrefSection();
// Prev key - integer value.
// (Present only if the file has more than one cross-reference section; shall be an indirect reference).
// The byte offset in the decoded stream from the beginning of the file
// to the beginning of the previous cross-reference section.
PdfDictionary trailer2 = trailer;
final Set alreadyVisitedXrefTables = new HashSet<>();
while (true) {
alreadyVisitedXrefTables.add(startxref);
PdfNumber prev = getXrefPrev(trailer2.get(PdfName.Prev, false));
if (prev == null) {
break;
}
long prevXrefOffset = prev.longValue();
if (alreadyVisitedXrefTables.contains(prevXrefOffset)) {
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
// Throw the exception to rebuild xref table, it'll be caught in method above.
throw new PdfException(KernelExceptionMessageConstant.
TRAILER_PREV_ENTRY_POINTS_TO_ITS_OWN_CROSS_REFERENCE_SECTION);
} else {
throw new XrefCycledReferencesException(
KernelExceptionMessageConstant.XREF_TABLE_HAS_CYCLED_REFERENCES);
}
}
startxref = prevXrefOffset;
tokens.seek(startxref);
trailer2 = readXrefSection();
}
Integer xrefSize = trailer.getAsInt(PdfName.Size);
if (xrefSize == null) {
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_TABLE);
}
}
protected PdfDictionary readXrefSection() throws IOException {
tokens.nextValidToken();
if (!tokens.tokenValueEqualsTo(PdfTokenizer.Xref))
tokens.throwError(KernelExceptionMessageConstant.XREF_SUBSECTION_NOT_FOUND);
PdfXrefTable xref = pdfDocument.getXref();
while (true) {
tokens.nextValidToken();
if (tokens.tokenValueEqualsTo(PdfTokenizer.Trailer)) {
break;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
tokens.throwError(
KernelExceptionMessageConstant.OBJECT_NUMBER_OF_THE_FIRST_OBJECT_IN_THIS_XREF_SUBSECTION_NOT_FOUND);
}
int start = tokens.getIntValue();
tokens.nextValidToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
tokens.throwError(KernelExceptionMessageConstant.NUMBER_OF_ENTRIES_IN_THIS_XREF_SUBSECTION_NOT_FOUND);
}
int end = tokens.getIntValue() + start;
for (int num = start; num < end; num++) {
tokens.nextValidToken();
long pos = tokens.getLongValue();
tokens.nextValidToken();
int gen = tokens.getIntValue();
tokens.nextValidToken();
if (pos == 0L && gen == 65535 && num == 1 && start != 0) {
// Very rarely can an XREF have an incorrect start number. (SUP-1557)
// e.g.
// xref
// 1 13
// 0000000000 65535 f
// 0000000009 00000 n
// 0000215136 00000 n
// [...]
// Because of how iText reads (and initializes) the XREF, this will lead to the XREF having two 0000 65535 entries.
// This throws off the parsing and other operations you'd like to perform.
// To fix this we reset our index and decrease the limit when we've encountered the magic entry at position 1.
num = 0;
end--;
continue;
}
PdfIndirectReference reference = xref.get(num);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == gen;
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
reference = new PdfIndirectReference(pdfDocument, num, gen, pos);
} else if (refReadingState) {
reference.setOffset(pos);
reference.clearState(PdfObject.READING);
} else {
continue;
}
if (tokens.tokenValueEqualsTo(PdfTokenizer.N)) {
if (pos == 0) {
tokens.throwError(
KernelExceptionMessageConstant.FILE_POSITION_0_CROSS_REFERENCE_ENTRY_IN_THIS_XREF_SUBSECTION);
}
} else if (tokens.tokenValueEqualsTo(PdfTokenizer.F)) {
if (refFirstEncountered) {
reference.setState(PdfObject.FREE);
}
} else {
tokens.throwError(
KernelExceptionMessageConstant.INVALID_CROSS_REFERENCE_ENTRY_IN_THIS_XREF_SUBSECTION);
}
if (refFirstEncountered) {
xref.add(reference);
}
}
}
processXref(xref);
PdfDictionary trailer = (PdfDictionary) readObject(false);
PdfObject xrs = trailer.get(PdfName.XRefStm);
if (xrs != null && xrs.getType() == PdfObject.NUMBER) {
int loc = ((PdfNumber) xrs).intValue();
try {
readXrefStream(loc);
xrefStm = true;
hybridXref = true;
} catch (IOException e) {
xref.clear();
throw e;
}
}
return trailer;
}
protected boolean readXrefStream(long ptr) throws IOException {
final Set alreadyVisitedXrefStreams = new HashSet<>();
while (ptr != -1) {
tokens.seek(ptr);
if (!tokens.nextToken()) {
return false;
}
if (tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
}
if (!tokens.nextToken() || tokens.getTokenType() != PdfTokenizer.TokenType.Number) {
return false;
}
if (!tokens.nextToken() || !tokens.tokenValueEqualsTo(PdfTokenizer.Obj)) {
return false;
}
alreadyVisitedXrefStreams.add(ptr);
PdfXrefTable xref = pdfDocument.getXref();
PdfObject object = readObject(false);
PdfStream xrefStream;
if (object.getType() == PdfObject.STREAM) {
xrefStream = (PdfStream) object;
if (!PdfName.XRef.equals(xrefStream.get(PdfName.Type))) {
return false;
}
} else {
return false;
}
if (trailer == null) {
trailer = new PdfDictionary();
trailer.putAll(xrefStream);
trailer.remove(PdfName.DecodeParms);
trailer.remove(PdfName.Filter);
trailer.remove(PdfName.Prev);
trailer.remove(PdfName.Length);
}
int size = ((PdfNumber) xrefStream.get(PdfName.Size)).intValue();
PdfArray index;
PdfObject obj = xrefStream.get(PdfName.Index);
if (obj == null) {
index = new PdfArray();
index.add(new PdfNumber(0));
index.add(new PdfNumber(size));
} else {
index = (PdfArray) obj;
}
PdfArray w = xrefStream.getAsArray(PdfName.W);
long prev = -1;
obj = getXrefPrev(xrefStream.get(PdfName.Prev, false));
if (obj != null)
prev = ((PdfNumber) obj).longValue();
xref.setCapacity(size);
byte[] b = readStreamBytes(xrefStream, true);
int bptr = 0;
int[] wc = new int[3];
for (int k = 0; k < 3; ++k) {
wc[k] = w.getAsNumber(k).intValue();
}
for (int idx = 0; idx < index.size(); idx += 2) {
int start = index.getAsNumber(idx).intValue();
int length = index.getAsNumber(idx + 1).intValue();
xref.setCapacity(start + length);
while (length-- > 0) {
int type = 1;
if (wc[0] > 0) {
type = 0;
for (int k = 0; k < wc[0]; ++k) {
type = (type << 8) + (b[bptr++] & 0xff);
}
}
long field2 = 0;
for (int k = 0; k < wc[1]; ++k) {
field2 = (field2 << 8) + (b[bptr++] & 0xff);
}
int field3 = 0;
for (int k = 0; k < wc[2]; ++k) {
field3 = (field3 << 8) + (b[bptr++] & 0xff);
}
int base = start;
PdfIndirectReference newReference;
switch (type) {
case 0:
newReference = (PdfIndirectReference) new PdfIndirectReference(pdfDocument, base, field3, field2).setState(PdfObject.FREE);
break;
case 1:
newReference = new PdfIndirectReference(pdfDocument, base, field3, field2);
break;
case 2:
newReference = new PdfIndirectReference(pdfDocument, base, 0, field3);
newReference.setObjStreamNumber((int) field2);
break;
default:
throw new PdfException(KernelExceptionMessageConstant.INVALID_XREF_STREAM);
}
PdfIndirectReference reference = xref.get(base);
boolean refReadingState = reference != null && reference.checkState(PdfObject.READING) && reference.getGenNumber() == newReference.getGenNumber();
// for references that are added by xref table itself (like 0 entry)
boolean refFirstEncountered = reference == null
|| !refReadingState && reference.getDocument() == null;
if (refFirstEncountered) {
xref.add(newReference);
} else if (refReadingState) {
reference.setOffset(newReference.getOffset());
reference.setObjStreamNumber(newReference.getObjStreamNumber());
reference.clearState(PdfObject.READING);
}
++start;
}
}
processXref(xref);
ptr = prev;
if (alreadyVisitedXrefStreams.contains(ptr)) {
throw new XrefCycledReferencesException(
KernelExceptionMessageConstant.XREF_STREAM_HAS_CYCLED_REFERENCES);
}
}
return true;
}
protected void fixXref() throws IOException {
fixedXref = true;
PdfXrefTable xref = pdfDocument.getXref();
tokens.seek(0);
ByteBuffer buffer = new ByteBuffer(24);
PdfTokenizer lineTokeniser = new PdfTokenizer(new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)));
for (; ; ) {
long pos = tokens.getPosition();
buffer.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true))
break;
if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokeniser);
if (obj == null)
continue;
int num = obj[0];
int gen = obj[1];
PdfIndirectReference reference = xref.get(num);
if (reference != null && reference.getGenNumber() == gen) {
reference.fixOffset(pos);
}
}
}
}
protected void rebuildXref() throws IOException {
xrefStm = false;
hybridXref = false;
rebuiltXref = true;
PdfXrefTable xref = pdfDocument.getXref();
xref.clear();
tokens.seek(0);
trailer = null;
ByteBuffer buffer = new ByteBuffer(24);
try (PdfTokenizer lineTokenizer = new PdfTokenizer(
new RandomAccessFileOrArray(new ReusableRandomAccessSource(buffer)))) {
Long trailerIndex = null;
for (; ; ) {
long pos = tokens.getPosition();
buffer.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(buffer, true)) {
break;
}
if (buffer.get(0) == 't') {
if (!PdfTokenizer.checkTrailer(buffer)) {
continue;
}
tokens.seek(pos);
tokens.nextToken();
pos = tokens.getPosition();
if (isCurrentObjectATrailer()) {
// if the pdf is linearized it is possible that the trailer has been read
// before the actual objects it refers to this causes the trailer to have
// objects in READING state that's why we keep track of the position of the
// trailer and then asign it when the whole pdf has been loaded
trailerIndex = pos;
} else {
tokens.seek(pos);
}
} else if (buffer.get(0) >= '0' && buffer.get(0) <= '9') {
int[] obj = PdfTokenizer.checkObjectStart(lineTokenizer);
if (obj == null) {
continue;
}
int num = obj[0];
int gen = obj[1];
if (xref.get(num) == null || xref.get(num).getGenNumber() <= gen) {
xref.add(new PdfIndirectReference(pdfDocument, num, gen, pos));
}
}
}
// now that the document has been read fully the underlying trailer references won't be
// in READING state when the pdf has been linearised now we can assign the trailer
// and it will have the right references
setTrailerFromTrailerIndex(trailerIndex);
}
}
private boolean isCurrentObjectATrailer() {
try {
final PdfDictionary dic = (PdfDictionary) readObject(false);
return dic.get(PdfName.Root, false) != null;
} catch (Exception e) {
return false;
}
}
private void setTrailerFromTrailerIndex(Long trailerIndex) throws IOException {
if (trailerIndex == null) {
throw new PdfException(KernelExceptionMessageConstant.TRAILER_NOT_FOUND);
}
tokens.seek((long)trailerIndex);
final PdfDictionary dic = (PdfDictionary) readObject(false);
if (dic.get(PdfName.Root, false) != null) {
trailer = dic;
}
if (trailer == null) {
throw new PdfException(KernelExceptionMessageConstant.TRAILER_NOT_FOUND);
}
}
protected PdfNumber getXrefPrev(PdfObject prevObjectToCheck) {
if (prevObjectToCheck == null) {
return null;
}
if (prevObjectToCheck.getType() == PdfObject.NUMBER) {
return (PdfNumber) prevObjectToCheck;
} else {
if (prevObjectToCheck.getType() == PdfObject.INDIRECT_REFERENCE &&
StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final PdfObject value = ((PdfIndirectReference) prevObjectToCheck).getRefersTo(true);
if (value != null && value.getType() == PdfObject.NUMBER) {
return (PdfNumber) value;
}
}
throw new InvalidXRefPrevException(
KernelExceptionMessageConstant.XREF_PREV_SHALL_BE_DIRECT_NUMBER_OBJECT);
}
}
boolean isMemorySavingMode() {
return memorySavingMode;
}
void setXrefProcessor(XrefProcessor xrefProcessor) {
this.xrefProcessor = xrefProcessor;
}
private void processArrayReadError() {
final String error = MessageFormatUtil.format(KernelExceptionMessageConstant.UNEXPECTED_TOKEN,
new String(tokens.getByteContent(), StandardCharsets.UTF_8));
if (StrictnessLevel.CONSERVATIVE.isStricter(this.getStrictnessLevel())) {
final Logger logger = LoggerFactory.getLogger(PdfReader.class);
logger.error(error);
} else {
tokens.throwError(error);
}
}
private void readDecryptObj() {
if (encrypted)
return;
PdfDictionary enc = trailer.getAsDictionary(PdfName.Encrypt);
if (enc == null)
return;
encrypted = true;
PdfName filter = enc.getAsName(PdfName.Filter);
if (PdfName.Adobe_PubSec.equals(filter)) {
if (properties.certificate == null) {
throw new PdfException(
KernelExceptionMessageConstant.CERTIFICATE_IS_NOT_PROVIDED_DOCUMENT_IS_ENCRYPTED_WITH_PUBLIC_KEY_CERTIFICATE);
}
decrypt = new PdfEncryption(enc, properties.certificateKey, properties.certificate,
properties.certificateKeyProvider, properties.externalDecryptionProcess);
} else if (PdfName.Standard.equals(filter)) {
decrypt = new PdfEncryption(enc, properties.password, getOriginalFileId());
} else {
throw new UnsupportedSecurityHandlerException(MessageFormatUtil.format(KernelExceptionMessageConstant.UNSUPPORTED_SECURITY_HANDLER, filter));
}
}
private PdfObject readObject(PdfIndirectReference reference, boolean fixXref) {
if (reference == null)
return null;
if (reference.refersTo != null)
return reference.refersTo;
try {
currentIndirectReference = reference;
if (reference.getObjStreamNumber() > 0) {
PdfStream objectStream = (PdfStream) pdfDocument.getXref().
get(reference.getObjStreamNumber()).getRefersTo(false);
readObjectStream(objectStream);
return reference.refersTo;
} else if (reference.getOffset() > 0) {
PdfObject object;
try {
tokens.seek(reference.getOffset());
tokens.nextValidToken();
if (tokens.getTokenType() != PdfTokenizer.TokenType.Obj
|| tokens.getObjNr() != reference.getObjNumber()
|| tokens.getGenNr() != reference.getGenNumber()) {
tokens.throwError(
KernelExceptionMessageConstant.INVALID_OFFSET_FOR_THIS_OBJECT, reference.toString());
}
object = readObject(false);
} catch (RuntimeException ex) {
if (fixXref && reference.getObjStreamNumber() == 0) {
fixXref();
object = readObject(reference, false);
} else {
throw ex;
}
}
return object != null ? object.setIndirectReference(reference) : null;
} else {
return null;
}
} catch (IOException e) {
throw new PdfException(KernelExceptionMessageConstant.CANNOT_READ_PDF_OBJECT, e);
}
}
private void checkPdfStreamLength(PdfStream pdfStream) throws IOException {
if (!correctStreamLength)
return;
long fileLength = tokens.length();
long start = pdfStream.getOffset();
boolean calc = false;
int streamLength = 0;
PdfNumber pdfNumber = pdfStream.getAsNumber(PdfName.Length);
if (pdfNumber != null) {
streamLength = pdfNumber.intValue();
if (streamLength + start > fileLength - 20) {
calc = true;
} else {
tokens.seek(start + streamLength);
String line = tokens.readString(20);
if (!line.startsWith(endstream2) && !line.startsWith(endstream3) &&
!line.startsWith(endstream4) && !line.startsWith(endstream1)) {
calc = true;
}
}
} else {
pdfNumber = new PdfNumber(0);
pdfStream.put(PdfName.Length, pdfNumber);
calc = true;
}
if (calc) {
ByteBuffer line = new ByteBuffer(16);
tokens.seek(start);
long pos;
while (true) {
pos = tokens.getPosition();
line.reset();
// added boolean because of mailing list issue (17 Feb. 2014)
if (!tokens.readLineSegment(line, false)) {
if (!StrictnessLevel.CONSERVATIVE.isStricter(this.strictnessLevel)) {
throw new PdfException(KernelExceptionMessageConstant.STREAM_SHALL_END_WITH_ENDSTREAM);
}
break;
}
if (line.startsWith(endstream)) {
break;
} else if (line.startsWith(endobj)) {
tokens.seek(pos - 16);
String s = tokens.readString(16);
int index = s.indexOf(endstream1);
if (index >= 0)
pos = pos - 16 + index;
break;
}
}
streamLength = (int) (pos - start);
tokens.seek(pos - 2);
if (tokens.read() == 13) {
streamLength--;
}
tokens.seek(pos - 1);
if (tokens.read() == 10) {
streamLength--;
}
pdfNumber.setValue(streamLength);
pdfStream.updateLength(streamLength);
}
}
private PdfObject createPdfNullInstance(boolean readAsDirect) {
if (readAsDirect) {
return PdfNull.PDF_NULL;
} else {
return new PdfNull();
}
}
/**
* Utility method that checks the provided byte source to see if it has junk bytes at the beginning. If junk bytes
* are found, construct a tokeniser that ignores the junk. Otherwise, construct a tokeniser for the byte source as it is
*
* @param byteSource the source to check
* @return a tokeniser that is guaranteed to start at the PDF header
* @throws IOException if there is a problem reading the byte source
*/
private static PdfTokenizer getOffsetTokeniser(IRandomAccessSource byteSource, boolean closeStream)
throws IOException {
PdfTokenizer tok = new PdfTokenizer(new RandomAccessFileOrArray(byteSource));
int offset;
try {
offset = tok.getHeaderOffset();
} catch (com.itextpdf.io.exceptions.IOException ex) {
if (closeStream) {
tok.close();
}
throw ex;
}
if (offset != 0) {
IRandomAccessSource offsetSource = new WindowRandomAccessSource(byteSource, offset);
tok = new PdfTokenizer(new RandomAccessFileOrArray(offsetSource));
}
return tok;
}
private void processXref(PdfXrefTable xrefTable) throws IOException {
long currentPosition = tokens.getPosition();
try {
xrefProcessor.processXref(xrefTable, tokens);
} finally {
tokens.seek(currentPosition);
}
}
protected static class ReusableRandomAccessSource implements IRandomAccessSource {
private ByteBuffer buffer;
public ReusableRandomAccessSource(ByteBuffer buffer) {
if (buffer == null) throw new IllegalArgumentException("Passed byte buffer can not be null.");
this.buffer = buffer;
}
@Override
public int get(long offset) {
if (offset >= buffer.size()) return -1;
return 0xff & buffer.getInternalBuffer()[(int) offset];
}
@Override
public int get(long offset, byte[] bytes, int off, int len) {
if (buffer == null) throw new IllegalStateException("Already closed");
if (offset >= buffer.size())
return -1;
if (offset + len > buffer.size())
len = (int) (buffer.size() - offset);
System.arraycopy(buffer.getInternalBuffer(), (int) offset, bytes, off, len);
return len;
}
@Override
public long length() {
return buffer.size();
}
@Override
public void close() {
buffer = null;
}
}
/**
* Enumeration representing the strictness level for reading.
*/
public enum StrictnessLevel {
/**
* The reading strictness level at which iText fails (throws an exception) in case of
* contradiction with PDF specification, but still recovers from mild parsing errors
* and ambiguities.
*/
CONSERVATIVE(5000),
/**
* The reading strictness level at which iText tries to recover from parsing
* errors if possible.
*/
LENIENT(3000);
private final int levelValue;
StrictnessLevel(int levelValue) {
this.levelValue = levelValue;
}
/**
* Checks whether the current instance represents more strict reading level than
* the provided one. Note that the {@code null} is less strict than any other value.
*
* @param compareWith the {@link StrictnessLevel} to compare with
*
* @return {@code true} if the current level is stricter than the provided one
*/
public boolean isStricter(StrictnessLevel compareWith) {
return compareWith == null || this.levelValue > compareWith.levelValue;
}
}
/**
* Class containing a callback which is called on every xref table reading.
*/
static class XrefProcessor {
/**
* Process xref table.
*
* @param xrefTable {@link PdfXrefTable} to be processed
* @param tokenizer {@link PdfTokenizer} to be processed
*
* @throws IOException in case of input-output related exceptions during PDF document reading
*/
void processXref(PdfXrefTable xrefTable, PdfTokenizer tokenizer) throws IOException {
// Do nothing.
}
}
}