All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.kernel.pdf.PdfXrefTable Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2024 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see .
 */
package com.itextpdf.kernel.pdf;

import com.itextpdf.commons.actions.data.ProductData;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteUtils;
import com.itextpdf.kernel.actions.data.ITextCoreProductData;
import com.itextpdf.kernel.exceptions.KernelExceptionMessageConstant;
import com.itextpdf.kernel.exceptions.PdfException;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A representation of a cross-referenced table of a PDF document.
 */
public class PdfXrefTable {


    private static final int INITIAL_CAPACITY = 32;
    private static final int MAX_GENERATION = 65535;

    /**
     * The maximum offset in a cross-reference stream. This is a limitation of the PDF specification.
     * SPEC1.7: 7.5.4 Cross reference trailer
     * 

* * It states that the offset should be a 10-digit byte, so the maximum value is 9999999999. * This is the max value that can be represented in 10 bytes. */ private static final long MAX_OFFSET_IN_CROSS_REFERENCE_STREAM = 9_999_999_999L; private static final byte[] freeXRefEntry = ByteUtils.getIsoBytes("f \n"); private static final byte[] inUseXRefEntry = ByteUtils.getIsoBytes("n \n"); private PdfIndirectReference[] xref; private int count = 0; private boolean readingCompleted; private MemoryLimitsAwareHandler memoryLimitsAwareHandler; /** * Free references linked list is stored in a form of a map, where: * key - free reference obj number; * value - previous item in the linked list of free references for the object denoted by the key. */ private final TreeMap freeReferencesLinkedList; /** * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document. * Capacity and {@link MemoryLimitsAwareHandler} instance would be set by default values. */ public PdfXrefTable() { this(INITIAL_CAPACITY); } /** * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document. * * @param capacity initial capacity of xref table. */ public PdfXrefTable(int capacity) { this(capacity, null); } /** * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document. * * @param memoryLimitsAwareHandler custom {@link MemoryLimitsAwareHandler} to set. */ public PdfXrefTable(MemoryLimitsAwareHandler memoryLimitsAwareHandler) { this(INITIAL_CAPACITY, memoryLimitsAwareHandler); } /** * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document. * * @param capacity initial capacity of xref table. * @param memoryLimitsAwareHandler memoryLimitsAwareHandler custom {@link MemoryLimitsAwareHandler} to set. */ public PdfXrefTable(int capacity, MemoryLimitsAwareHandler memoryLimitsAwareHandler) { if (capacity < 1) { capacity = memoryLimitsAwareHandler == null ? INITIAL_CAPACITY : Math.min(INITIAL_CAPACITY, memoryLimitsAwareHandler.getMaxNumberOfElementsInXrefStructure()); } this.memoryLimitsAwareHandler = memoryLimitsAwareHandler; if (this.memoryLimitsAwareHandler != null) { this.memoryLimitsAwareHandler.checkIfXrefStructureExceedsTheLimit(capacity); } this.xref = new PdfIndirectReference[capacity]; this.freeReferencesLinkedList = new TreeMap<>(); add((PdfIndirectReference) new PdfIndirectReference(null, 0, MAX_GENERATION, 0).setState(PdfObject.FREE)); } /** * Sets custom {@link MemoryLimitsAwareHandler}. * * @param memoryLimitsAwareHandler instance to set. */ public void setMemoryLimitsAwareHandler(MemoryLimitsAwareHandler memoryLimitsAwareHandler) { this.memoryLimitsAwareHandler = memoryLimitsAwareHandler; } /** * Adds indirect reference to list of indirect objects. * * @param reference indirect reference to add. * @return reference from param */ public PdfIndirectReference add(PdfIndirectReference reference) { if (reference == null) { return null; } int objNr = reference.getObjNumber(); this.count = Math.max(this.count, objNr); ensureCount(objNr); xref[objNr] = reference; return reference; } /** * Get size of cross-reference table. * * @return amount of lines including zero-object */ public int size() { return count + 1; } /** * Calculates a number of stored references to indirect objects. * * @return number of indirect objects */ public int getCountOfIndirectObjects() { int countOfIndirectObjects = 0; for (final PdfIndirectReference ref: xref) { if (ref != null && ! ref.isFree()) { countOfIndirectObjects++; } } return countOfIndirectObjects; } /** * Get appropriate reference to indirect object. * * @param index is the index of required object * @return reference to object with the provided index */ public PdfIndirectReference get(int index) { if (index > count) { return null; } return xref[index]; } /** * Convenience method to write the fingerprint preceding the trailer. * The fingerprint contains information on iText products used in the generation or manipulation * of an outputted PDF file. * * @param document pdfDocument to write the fingerprint to */ protected static void writeKeyInfo(PdfDocument document) { PdfWriter writer = document.getWriter(); final Collection products = document.getFingerPrint().getProducts(); if (products.isEmpty()) { writer.writeString(MessageFormatUtil .format("%iText-{0}-no-registered-products\n", ITextCoreProductData.getInstance().getVersion())); } else { for (ProductData productData : products) { writer.writeString(MessageFormatUtil .format("%iText-{0}-{1}\n", productData.getPublicProductName(), productData.getVersion())); } } } /** * Creates next available indirect reference. * * @param document is the current {@link PdfDocument document} * @return created indirect reference. */ protected PdfIndirectReference createNextIndirectReference(PdfDocument document) { PdfIndirectReference reference = new PdfIndirectReference(document, ++count); add(reference); return (PdfIndirectReference) reference.setState(PdfObject.MODIFIED); } /** * Set the reference to free state. * * @param reference is a reference to be updated. */ protected void freeReference(PdfIndirectReference reference) { if (reference.isFree()) { return; } if (reference.checkState(PdfObject.MUST_BE_FLUSHED)) { Logger logger = LoggerFactory.getLogger(PdfXrefTable.class); logger.error(IoLogMessageConstant.INDIRECT_REFERENCE_USED_IN_FLUSHED_OBJECT_MADE_FREE); return; } if (reference.checkState(PdfObject.FLUSHED)) { Logger logger = LoggerFactory.getLogger(PdfXrefTable.class); logger.error(IoLogMessageConstant.ALREADY_FLUSHED_INDIRECT_OBJECT_MADE_FREE); return; } reference.setState(PdfObject.FREE).setState(PdfObject.MODIFIED); appendNewRefToFreeList(reference); if (reference.getGenNumber() < MAX_GENERATION) { reference.genNr++; } } /** * Gets the capacity of xref stream. * * @return the capacity of xref stream. */ protected int getCapacity() { return xref.length; } /** * Increase capacity of the array of indirect references. * * @param capacity is a new capacity to set */ protected void setCapacity(int capacity) { if (capacity > xref.length) { extendXref(capacity); } } /** * Writes cross reference table and trailer to PDF. * * @param document is the current {@link PdfDocument document} * @param fileId field id * @param crypto pdf encryption * @throws IOException if any I/O error occurs */ protected void writeXrefTableAndTrailer(PdfDocument document, PdfObject fileId, PdfObject crypto) throws IOException { PdfWriter writer = document.getWriter(); if (!document.properties.appendMode) { for (int i = count; i > 0; --i) { PdfIndirectReference lastRef = xref[i]; if (lastRef == null || lastRef.isFree()) { removeFreeRefFromList(i); --count; } else { break; } } } PdfStream xrefStream = null; if (writer.isFullCompression()) { xrefStream = new PdfStream(); xrefStream.makeIndirect(document); } List sections = createSections(document, false); boolean noModifiedObjects = (sections.size() == 0) || (xrefStream != null && sections.size() == 2 && sections.get(0) == count && sections.get(1) == 1); if (document.properties.appendMode && noModifiedObjects) { // No modifications in document xref = null; return; } document.checkIsoConformance(this, IsoKey.XREF_TABLE); long startxref = writer.getCurrentPos(); long xRefStmPos = -1; if (xrefStream != null) { xrefStream.put(PdfName.Type, PdfName.XRef); xrefStream.put(PdfName.ID, fileId); if (crypto != null) xrefStream.put(PdfName.Encrypt, crypto); xrefStream.put(PdfName.Size, new PdfNumber(this.size())); int offsetSize = getOffsetSize(Math.max(startxref, size())); xrefStream.put(PdfName.W, new PdfArray( Arrays.asList((PdfObject) new PdfNumber(1), new PdfNumber(offsetSize), new PdfNumber(2)))); xrefStream.put(PdfName.Info, document.getDocumentInfo().getPdfObject()); xrefStream.put(PdfName.Root, document.getCatalog().getPdfObject()); PdfArray index = new PdfArray(); for (Integer section : sections) { index.add(new PdfNumber((int) section)); } if (document.properties.appendMode && !document.reader.hybridXref) { // "not meaningful in hybrid-reference files" PdfNumber lastXref = new PdfNumber(document.reader.getLastXref()); xrefStream.put(PdfName.Prev, lastXref); } xrefStream.put(PdfName.Index, index); xrefStream.getIndirectReference().setOffset(startxref); PdfXrefTable xrefTable = document.getXref(); for (int k = 0; k < sections.size(); k += 2) { int first = (int) sections.get(k); int len = (int) sections.get(k + 1); for (int i = first; i < first + len; i++) { PdfIndirectReference reference = xrefTable.get(i); if (reference.isFree()) { xrefStream.getOutputStream().write(0); xrefStream.getOutputStream().write(reference.getOffset(), offsetSize); xrefStream.getOutputStream().write(reference.getGenNumber(), 2); } else if (reference.getObjStreamNumber() == 0) { xrefStream.getOutputStream().write(1); xrefStream.getOutputStream().write(reference.getOffset(), offsetSize); xrefStream.getOutputStream().write(reference.getGenNumber(), 2); } else { xrefStream.getOutputStream().write(2); xrefStream.getOutputStream().write(reference.getObjStreamNumber(), offsetSize); xrefStream.getOutputStream().write(reference.getIndex(), 2); } } } xrefStream.flush(); xRefStmPos = startxref; } // For documents with hybrid cross-reference table, i.e. containing xref streams as well as regular xref sections, // we write additional regular xref section at the end of the document because the /Prev reference from // xref stream to a regular xref section doesn't seem to be valid boolean needsRegularXref = !writer.isFullCompression() || (document.properties.appendMode && document.reader.hybridXref); if (needsRegularXref) { startxref = writer.getCurrentPos(); writer.writeString("xref\n"); PdfXrefTable xrefTable = document.getXref(); if (xRefStmPos != -1) { // Get rid of all objects from object stream. This is done for hybrid documents sections = createSections(document, true); } for (int k = 0; k < sections.size(); k += 2) { int first = (int) sections.get(k); int len = (int) sections.get(k + 1); writer.writeInteger(first).writeSpace().writeInteger(len).writeByte((byte) '\n'); for (int i = first; i < first + len; i++) { PdfIndirectReference reference = xrefTable.get(i); if (reference.getOffset() > MAX_OFFSET_IN_CROSS_REFERENCE_STREAM) { throw new PdfException(KernelExceptionMessageConstant.XREF_HAS_AN_ENTRY_WITH_TOO_BIG_OFFSET); } StringBuilder off = new StringBuilder("0000000000").append(reference.getOffset()); StringBuilder gen = new StringBuilder("00000").append(reference.getGenNumber()); writer.writeString(off.substring(off.length() - 10, off.length())).writeSpace(). writeString(gen.substring(gen.length() - 5, gen.length())).writeSpace(); if (reference.isFree()) { writer.writeBytes(freeXRefEntry); } else { writer.writeBytes(inUseXRefEntry); } } } PdfDictionary trailer = document.getTrailer(); // Remove all unused keys in case stamp mode in case original file has full compression, but destination file has not. trailer.remove(PdfName.W); trailer.remove(PdfName.Index); trailer.remove(PdfName.Type); trailer.remove(PdfName.Length); trailer.put(PdfName.Size, new PdfNumber(this.size())); trailer.put(PdfName.ID, fileId); if (xRefStmPos != -1) { trailer.put(PdfName.XRefStm, new PdfNumber(xRefStmPos)); } if (crypto != null) trailer.put(PdfName.Encrypt, crypto); writer.writeString("trailer\n"); if (document.properties.appendMode) { PdfNumber lastXref = new PdfNumber(document.reader.getLastXref()); trailer.put(PdfName.Prev, lastXref); } writer.write(document.getTrailer()); writer.write('\n'); } writeKeyInfo(document); writer.writeString("startxref\n"). writeLong(startxref). writeString("\n%%EOF\n"); xref = null; freeReferencesLinkedList.clear(); } /** * Change the state of the cross-reference table to mark that reading of the document * was completed. */ void markReadingCompleted() { readingCompleted = true; } /** * Change the state of the cross-reference table to unmark that reading of the document * was completed. */ void unmarkReadingCompleted() { readingCompleted = false; } /** * Check if reading of the document was completed. * * @return true if reading was completed and false otherwise */ boolean isReadingCompleted() { return readingCompleted; } /** * Set up appropriate state for the free references list. * * @param pdfDocument is the current {@link PdfDocument document} */ void initFreeReferencesList(PdfDocument pdfDocument) { freeReferencesLinkedList.clear(); // ensure zero object is free xref[0].setState(PdfObject.FREE); TreeSet freeReferences = new TreeSet<>(); for (int i = 1; i < size() && i < xref.length; ++i) { PdfIndirectReference ref = xref[i]; if (ref == null || ref.isFree()) { freeReferences.add(i); } } PdfIndirectReference prevFreeRef = xref[0]; while (!freeReferences.isEmpty()) { int currFreeRefObjNr = -1; if (prevFreeRef.getOffset() <= Integer.MAX_VALUE) { currFreeRefObjNr = (int) prevFreeRef.getOffset(); } if (!freeReferences.contains(currFreeRefObjNr) || xref[currFreeRefObjNr] == null) { break; } freeReferencesLinkedList.put(currFreeRefObjNr, prevFreeRef); prevFreeRef = xref[currFreeRefObjNr]; freeReferences.remove(currFreeRefObjNr); } while (!freeReferences.isEmpty()) { int next = freeReferences.pollFirst(); if (xref[next] == null) { if (pdfDocument.properties.appendMode) { continue; } xref[next] = (PdfIndirectReference) new PdfIndirectReference(pdfDocument, next, 0).setState(PdfObject.FREE).setState(PdfObject.MODIFIED); } else if (xref[next].getGenNumber() == MAX_GENERATION && xref[next].getOffset() == 0) { continue; } if (prevFreeRef.getOffset() != (long)next) { ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(next); } freeReferencesLinkedList.put(next, prevFreeRef); prevFreeRef = xref[next]; } if (prevFreeRef.getOffset() != 0) { ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(0); } freeReferencesLinkedList.put(0, prevFreeRef); } /** * Method is used for object streams to avoid reuse existed references. * * @param document is the current {@link PdfDocument document} * @return created indirect reference to the object stream */ PdfIndirectReference createNewIndirectReference(PdfDocument document) { PdfIndirectReference reference = new PdfIndirectReference(document, ++count); add(reference); return (PdfIndirectReference) reference.setState(PdfObject.MODIFIED); } /** * Clear the state of the cross-reference table without free references removal. */ void clear() { for (int i = 1; i <= count; i++) { if (xref[i] != null && xref[i].isFree()) { continue; } xref[i] = null; } count = 1; } /** * Clear the state of the cross-reference table including free references. */ void clearAllReferences() { for (int i = 1; i <= count; i++) { xref[i] = null; } count = 1; } private List createSections(PdfDocument document, boolean dropObjectsFromObjectStream) { List sections = new ArrayList<>(); int first = 0; int len = 0; for (int i = 0; i < size(); i++) { PdfIndirectReference reference = xref[i]; if (document.properties.appendMode && reference != null && (!reference.checkState(PdfObject.MODIFIED) || (dropObjectsFromObjectStream && reference.getObjStreamNumber() != 0))) { reference = null; } if (reference == null) { if (len > 0) { sections.add(first); sections.add(len); } len = 0; } else { if (len > 0) { len++; } else { first = i; len = 1; } } } if (len > 0) { sections.add(first); sections.add(len); } return sections; } /** * Gets size of the offset. Max size is 2^40, i.e. 1 Tb. */ private int getOffsetSize(long startxref) { assert startxref >= 0 && startxref < (1L << 40); //initial size = 5 bytes. It is 1 Tb. Shall be enough. int size = 5; long mask = 0xff00000000L; for (; size > 1; size--) { if ((mask & startxref) != 0) break; // there is no need to use >>> because mask is positive mask >>= 8; } return size; } private void appendNewRefToFreeList(PdfIndirectReference reference) { reference.setOffset(0); if (freeReferencesLinkedList.isEmpty()) { assert false; // free references list is not initialized yet return; } PdfIndirectReference lastFreeRef = freeReferencesLinkedList.get(0); ((PdfIndirectReference) lastFreeRef.setState(PdfObject.MODIFIED)).setOffset(reference.getObjNumber()); freeReferencesLinkedList.put(reference.getObjNumber(), lastFreeRef); freeReferencesLinkedList.put(0, reference); } /** * Removes indirect reference from free references linked list. * It does not removes it from xref table and affects only the linked list formed by offset values of free references. * @param freeRefObjNr object number of the reference to be removed. * Removes the free reference with the least object number if this parameter is less than zero: * this could be used for finding the next free reference for reusing. * @return {@link PdfIndirectReference} instance of the removed free reference corresponding to the object number * passed as parameter. {@code null} - if given object number doesn't correspond to free reference or equals to zero. */ private PdfIndirectReference removeFreeRefFromList(int freeRefObjNr) { if (freeReferencesLinkedList.isEmpty()) { assert false; // free references list is not initialized yet return null; } if (freeRefObjNr == 0) { return null; } if (freeRefObjNr < 0) { Integer leastFreeRefObjNum = null; for (Map.Entry entry : freeReferencesLinkedList.entrySet()) { if (entry.getKey() <= 0 || xref[entry.getKey()].getGenNumber() >= MAX_GENERATION) { continue; } leastFreeRefObjNum = entry.getKey(); break; } if (leastFreeRefObjNum == null) { return null; } freeRefObjNr = (int)leastFreeRefObjNum; } PdfIndirectReference freeRef = xref[freeRefObjNr]; if (!freeRef.isFree()) { return null; } PdfIndirectReference prevFreeRef = freeReferencesLinkedList.remove(freeRef.getObjNumber()); if (prevFreeRef != null) { freeReferencesLinkedList.put((int) freeRef.getOffset(), prevFreeRef); ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(freeRef.getOffset()); } return freeRef; } private void ensureCount(int count) { if (count >= xref.length) { extendXref(count << 1); } } private void extendXref(int capacity) { if (this.memoryLimitsAwareHandler != null) { this.memoryLimitsAwareHandler.checkIfXrefStructureExceedsTheLimit(capacity); } PdfIndirectReference[] newXref = new PdfIndirectReference[capacity]; System.arraycopy(this.xref, 0, newXref, 0, this.xref.length); this.xref = newXref; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy