All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.kernel.pdf.PdfXrefTable Maven / Gradle / Ivy

There is a newer version: 9.0.0
Show newest version
/*
    This file is part of the iText (R) project.
    Copyright (c) 1998-2023 Apryse Group NV
    Authors: Apryse Software.

    This program is offered under a commercial and under the AGPL license.
    For commercial licensing, contact us at https://itextpdf.com/sales.  For AGPL licensing, see below.

    AGPL licensing:
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU Affero General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU Affero General Public License for more details.

    You should have received a copy of the GNU Affero General Public License
    along with this program.  If not, see .
 */
package com.itextpdf.kernel.pdf;

import com.itextpdf.commons.actions.data.ProductData;
import com.itextpdf.commons.utils.MessageFormatUtil;
import com.itextpdf.io.logs.IoLogMessageConstant;
import com.itextpdf.io.source.ByteUtils;
import com.itextpdf.kernel.actions.data.ITextCoreProductData;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.TreeSet;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A representation of a cross-referenced table of a PDF document.
 */
public class PdfXrefTable {


    private static final int INITIAL_CAPACITY = 32;
    private static final int MAX_GENERATION = 65535;

    private static final byte[] freeXRefEntry = ByteUtils.getIsoBytes("f \n");
    private static final byte[] inUseXRefEntry = ByteUtils.getIsoBytes("n \n");

    private PdfIndirectReference[] xref;
    private int count = 0;
    private boolean readingCompleted;
    private MemoryLimitsAwareHandler memoryLimitsAwareHandler;

    /**
     * Free references linked list is stored in a form of a map, where:
     * key - free reference obj number;
     * value - previous item in the linked list of free references for the object denoted by the key.
     */
    private final TreeMap freeReferencesLinkedList;

    /**
     * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document.
     * Capacity and {@link MemoryLimitsAwareHandler} instance would be set by default values.
     */
    public PdfXrefTable() {
        this(INITIAL_CAPACITY);
    }

    /**
     * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document.
     *
     * @param capacity initial capacity of xref table.
     */
    public PdfXrefTable(int capacity) {
        this(capacity, null);
    }

    /**
     * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document.
     *
     * @param memoryLimitsAwareHandler custom {@link MemoryLimitsAwareHandler} to set.
     */
    public PdfXrefTable(MemoryLimitsAwareHandler memoryLimitsAwareHandler) {
        this(INITIAL_CAPACITY, memoryLimitsAwareHandler);
    }

    /**
     * Creates a {@link PdfXrefTable} which will be used to store xref structure of the pdf document.
     *
     * @param capacity initial capacity of xref table.
     * @param memoryLimitsAwareHandler memoryLimitsAwareHandler custom {@link MemoryLimitsAwareHandler} to set.
     */
    public PdfXrefTable(int capacity, MemoryLimitsAwareHandler memoryLimitsAwareHandler) {
        if (capacity < 1) {
            capacity = memoryLimitsAwareHandler == null ? INITIAL_CAPACITY
                    : Math.min(INITIAL_CAPACITY, memoryLimitsAwareHandler.getMaxNumberOfElementsInXrefStructure());
        }
        this.memoryLimitsAwareHandler = memoryLimitsAwareHandler;
        if (this.memoryLimitsAwareHandler != null) {
            this.memoryLimitsAwareHandler.checkIfXrefStructureExceedsTheLimit(capacity);
        }
        this.xref = new PdfIndirectReference[capacity];
        this.freeReferencesLinkedList = new TreeMap<>();
        add((PdfIndirectReference) new PdfIndirectReference(null, 0, MAX_GENERATION, 0).setState(PdfObject.FREE));
    }

    /**
     * Sets custom {@link MemoryLimitsAwareHandler}.
     *
     * @param memoryLimitsAwareHandler instance to set.
     */
    public void setMemoryLimitsAwareHandler(MemoryLimitsAwareHandler memoryLimitsAwareHandler) {
        this.memoryLimitsAwareHandler = memoryLimitsAwareHandler;
    }

    /**
     * Adds indirect reference to list of indirect objects.
     *
     * @param reference indirect reference to add.
     * @return reference from param
     */
    public PdfIndirectReference add(PdfIndirectReference reference) {
        if (reference == null) {
            return null;
        }
        int objNr = reference.getObjNumber();
        this.count = Math.max(this.count, objNr);
        ensureCount(objNr);
        xref[objNr] = reference;
        return reference;
    }

    /**
     * Get size of cross-reference table.
     *
     * @return amount of lines including zero-object
     */
    public int size() {
        return count + 1;
    }

    /**
     * Calculates a number of stored references to indirect objects.
     *
     * @return number of indirect objects
     */
    public int getCountOfIndirectObjects() {
        int countOfIndirectObjects = 0;

        for (final PdfIndirectReference ref: xref) {
            if (ref != null && ! ref.isFree()) {
                countOfIndirectObjects++;
            }
        }

        return countOfIndirectObjects;
    }

    /**
     * Get appropriate reference to indirect object.
     *
     * @param index is the index of required object
     * @return reference to object with the provided index
     */
    public PdfIndirectReference get(int index) {
        if (index > count) {
            return null;
        }
        return xref[index];
    }

    /**
     * Convenience method to write the fingerprint preceding the trailer.
     * The fingerprint contains information on iText products used in the generation or manipulation
     * of an outputted PDF file.
     *
     * @param document pdfDocument to write the fingerprint to
     */
    protected static void writeKeyInfo(PdfDocument document) {
        PdfWriter writer = document.getWriter();

        final Collection products = document.getFingerPrint().getProducts();
        if (products.isEmpty()) {
            writer.writeString(MessageFormatUtil
                    .format("%iText-{0}-no-registered-products\n", ITextCoreProductData.getInstance().getVersion()));
        } else {
            for (ProductData productData : products) {
                writer.writeString(MessageFormatUtil
                        .format("%iText-{0}-{1}\n", productData.getPublicProductName(), productData.getVersion()));
            }
        }
    }

    /**
     * Creates next available indirect reference.
     *
     * @param document is the current {@link PdfDocument document}
     * @return created indirect reference.
     */
    protected PdfIndirectReference createNextIndirectReference(PdfDocument document) {
        PdfIndirectReference reference = new PdfIndirectReference(document, ++count);
        add(reference);
        return (PdfIndirectReference) reference.setState(PdfObject.MODIFIED);
    }

    /**
     * Set the reference to free state.
     *
     * @param reference is a reference to be updated.
     */
    protected void freeReference(PdfIndirectReference reference) {
        if (reference.isFree()) {
            return;
        }
        if (reference.checkState(PdfObject.MUST_BE_FLUSHED)) {
            Logger logger = LoggerFactory.getLogger(PdfXrefTable.class);
            logger.error(IoLogMessageConstant.INDIRECT_REFERENCE_USED_IN_FLUSHED_OBJECT_MADE_FREE);
            return;
        }
        if (reference.checkState(PdfObject.FLUSHED)) {
            Logger logger = LoggerFactory.getLogger(PdfXrefTable.class);
            logger.error(IoLogMessageConstant.ALREADY_FLUSHED_INDIRECT_OBJECT_MADE_FREE);
            return;
        }

        reference.setState(PdfObject.FREE).setState(PdfObject.MODIFIED);

        appendNewRefToFreeList(reference);

        if (reference.getGenNumber() < MAX_GENERATION) {
            reference.genNr++;
        }

    }

    /**
     * Gets the capacity of xref stream.
     *
     * @return the capacity of xref stream.
     */
    protected int getCapacity() {
        return xref.length;
    }

    /**
     * Increase capacity of the array of indirect references.
     *
     * @param capacity is a new capacity to set
     */
    protected void setCapacity(int capacity) {
        if (capacity > xref.length) {
            extendXref(capacity);
        }
    }

    /**
     * Writes cross reference table and trailer to PDF.
     *
     * @param document is the current {@link PdfDocument document}
     * @param fileId   field id
     * @param crypto   pdf encryption
     * @throws IOException if any I/O error occurs
     */
    protected void writeXrefTableAndTrailer(PdfDocument document, PdfObject fileId, PdfObject crypto) throws IOException {
        PdfWriter writer = document.getWriter();

        if (!document.properties.appendMode) {
            for (int i = count; i > 0; --i) {
                PdfIndirectReference lastRef = xref[i];
                if (lastRef == null || lastRef.isFree()) {
                    removeFreeRefFromList(i);
                    --count;
                } else {
                    break;
                }
            }
        }

        PdfStream xrefStream = null;
        if (writer.isFullCompression()) {
            xrefStream = new PdfStream();
            xrefStream.makeIndirect(document);
        }
        List sections = createSections(document, false);
        boolean noModifiedObjects = (sections.size() == 0) ||
                (xrefStream != null && sections.size() == 2 && sections.get(0) == count && sections.get(1) == 1);
        if (document.properties.appendMode && noModifiedObjects) {
            // No modifications in document
            xref = null;
            return;
        }

        document.checkIsoConformance(this, IsoKey.XREF_TABLE);

        long startxref = writer.getCurrentPos();
        long xRefStmPos = -1;
        if (xrefStream != null) {
            xrefStream.put(PdfName.Type, PdfName.XRef);
            xrefStream.put(PdfName.ID, fileId);
            if (crypto != null)
                xrefStream.put(PdfName.Encrypt, crypto);
            xrefStream.put(PdfName.Size, new PdfNumber(this.size()));

            int offsetSize = getOffsetSize(Math.max(startxref, size()));
            xrefStream.put(PdfName.W, new PdfArray(
                    Arrays.asList((PdfObject) new PdfNumber(1), new PdfNumber(offsetSize), new PdfNumber(2))));
            xrefStream.put(PdfName.Info, document.getDocumentInfo().getPdfObject());
            xrefStream.put(PdfName.Root, document.getCatalog().getPdfObject());
            PdfArray index = new PdfArray();
            for (Integer section : sections) {
                index.add(new PdfNumber((int) section));
            }
            if (document.properties.appendMode && !document.reader.hybridXref) {
                // "not meaningful in hybrid-reference files"
                PdfNumber lastXref = new PdfNumber(document.reader.getLastXref());
                xrefStream.put(PdfName.Prev, lastXref);
            }
            xrefStream.put(PdfName.Index, index);
            xrefStream.getIndirectReference().setOffset(startxref);
            PdfXrefTable xrefTable = document.getXref();
            for (int k = 0; k < sections.size(); k += 2) {
                int first = (int) sections.get(k);
                int len = (int) sections.get(k + 1);
                for (int i = first; i < first + len; i++) {
                    PdfIndirectReference reference = xrefTable.get(i);
                    if (reference.isFree()) {
                        xrefStream.getOutputStream().write(0);
                        xrefStream.getOutputStream().write(reference.getOffset(), offsetSize);
                        xrefStream.getOutputStream().write(reference.getGenNumber(), 2);
                    } else if (reference.getObjStreamNumber() == 0) {
                        xrefStream.getOutputStream().write(1);
                        xrefStream.getOutputStream().write(reference.getOffset(), offsetSize);
                        xrefStream.getOutputStream().write(reference.getGenNumber(), 2);
                    } else {
                        xrefStream.getOutputStream().write(2);
                        xrefStream.getOutputStream().write(reference.getObjStreamNumber(), offsetSize);
                        xrefStream.getOutputStream().write(reference.getIndex(), 2);
                    }
                }
            }
            xrefStream.flush();
            xRefStmPos = startxref;
        }

        // For documents with hybrid cross-reference table, i.e. containing xref streams as well as regular xref sections,
        // we write additional regular xref section at the end of the document because the /Prev reference from
        // xref stream to a regular xref section doesn't seem to be valid
        boolean needsRegularXref = !writer.isFullCompression() || (document.properties.appendMode && document.reader.hybridXref);

        if (needsRegularXref) {
            startxref = writer.getCurrentPos();
            writer.writeString("xref\n");
            PdfXrefTable xrefTable = document.getXref();
            if (xRefStmPos != -1) {
                // Get rid of all objects from object stream. This is done for hybrid documents
                sections = createSections(document, true);
            }
            for (int k = 0; k < sections.size(); k += 2) {
                int first = (int) sections.get(k);
                int len = (int) sections.get(k + 1);
                writer.writeInteger(first).writeSpace().writeInteger(len).writeByte((byte) '\n');
                for (int i = first; i < first + len; i++) {
                    PdfIndirectReference reference = xrefTable.get(i);

                    StringBuilder off = new StringBuilder("0000000000").append(reference.getOffset());
                    StringBuilder gen = new StringBuilder("00000").append(reference.getGenNumber());
                    writer.writeString(off.substring(off.length() - 10, off.length())).writeSpace().
                            writeString(gen.substring(gen.length() - 5, gen.length())).writeSpace();
                    if (reference.isFree()) {
                        writer.writeBytes(freeXRefEntry);
                    } else {
                        writer.writeBytes(inUseXRefEntry);
                    }
                }
            }
            PdfDictionary trailer = document.getTrailer();
            // Remove all unused keys in case stamp mode in case original file has full compression, but destination file has not.
            trailer.remove(PdfName.W);
            trailer.remove(PdfName.Index);
            trailer.remove(PdfName.Type);
            trailer.remove(PdfName.Length);
            trailer.put(PdfName.Size, new PdfNumber(this.size()));
            trailer.put(PdfName.ID, fileId);
            if (xRefStmPos != -1) {
                trailer.put(PdfName.XRefStm, new PdfNumber(xRefStmPos));
            }
            if (crypto != null)
                trailer.put(PdfName.Encrypt, crypto);
            writer.writeString("trailer\n");
            if (document.properties.appendMode) {
                PdfNumber lastXref = new PdfNumber(document.reader.getLastXref());
                trailer.put(PdfName.Prev, lastXref);
            }
            writer.write(document.getTrailer());
            writer.write('\n');
        }
        writeKeyInfo(document);
        writer.writeString("startxref\n").
                writeLong(startxref).
                writeString("\n%%EOF\n");
        xref = null;
        freeReferencesLinkedList.clear();
    }

    /**
     * Change the state of the cross-reference table to mark that reading of the document
     * was completed.
     */
    void markReadingCompleted() {
        readingCompleted = true;
    }

    /**
     * Check if reading of the document was completed.
     *
     * @return true if reading was completed and false otherwise
     */
    boolean isReadingCompleted() {
        return readingCompleted;
    }

    /**
     * Set up appropriate state for the free references list.
     *
     * @param pdfDocument is the current {@link PdfDocument document}
     */
    void initFreeReferencesList(PdfDocument pdfDocument) {
        freeReferencesLinkedList.clear();

        // ensure zero object is free
        xref[0].setState(PdfObject.FREE);
        TreeSet freeReferences = new TreeSet<>();
        for (int i = 1; i < size(); ++i) {
            PdfIndirectReference ref = xref[i];
            if (ref == null || ref.isFree()) {
                freeReferences.add(i);
            }
        }

        PdfIndirectReference prevFreeRef = xref[0];
        while (!freeReferences.isEmpty()) {
            int currFreeRefObjNr = -1;
            if (prevFreeRef.getOffset() <= Integer.MAX_VALUE) {
                currFreeRefObjNr = (int) prevFreeRef.getOffset();
            }
            if (!freeReferences.contains(currFreeRefObjNr) || xref[currFreeRefObjNr] == null) {
                break;
            }

            freeReferencesLinkedList.put(currFreeRefObjNr, prevFreeRef);
            prevFreeRef = xref[currFreeRefObjNr];
            freeReferences.remove(currFreeRefObjNr);
        }

        while (!freeReferences.isEmpty()) {
            int next = freeReferences.pollFirst();
            if (xref[next] == null) {
                if (pdfDocument.properties.appendMode) {
                    continue;
                }
                xref[next] = (PdfIndirectReference) new PdfIndirectReference(pdfDocument, next, 0).setState(PdfObject.FREE).setState(PdfObject.MODIFIED);
            } else if (xref[next].getGenNumber() == MAX_GENERATION && xref[next].getOffset() == 0) {
                continue;
            }
            if (prevFreeRef.getOffset() != (long)next) {
                ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(next);
            }
            freeReferencesLinkedList.put(next, prevFreeRef);
            prevFreeRef = xref[next];
        }

        if (prevFreeRef.getOffset() != 0) {
            ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(0);
        }
        freeReferencesLinkedList.put(0, prevFreeRef);
    }

    /**
     * Method is used for object streams to avoid reuse existed references.
     *
     * @param document is the current {@link PdfDocument document}
     * @return created indirect reference to the object stream
     */
    PdfIndirectReference createNewIndirectReference(PdfDocument document) {
        PdfIndirectReference reference = new PdfIndirectReference(document, ++count);
        add(reference);
        return (PdfIndirectReference) reference.setState(PdfObject.MODIFIED);
    }

    /**
     * Clear the state of the cross-reference table.
     */
    void clear() {
        for (int i = 1; i <= count; i++) {
            if (xref[i] != null && xref[i].isFree()) {
                continue;
            }
            xref[i] = null;
        }
        count = 1;
    }

    private List createSections(PdfDocument document, boolean dropObjectsFromObjectStream) {
        List sections = new ArrayList<>();
        int first = 0;
        int len = 0;
        for (int i = 0; i < size(); i++) {
            PdfIndirectReference reference = xref[i];
            if (document.properties.appendMode && reference != null &&
                    (!reference.checkState(PdfObject.MODIFIED) || (dropObjectsFromObjectStream && reference.getObjStreamNumber() != 0))) {
                reference = null;
            }

            if (reference == null) {
                if (len > 0) {
                    sections.add(first);
                    sections.add(len);
                }
                len = 0;
            } else {
                if (len > 0) {
                    len++;
                } else {
                    first = i;
                    len = 1;
                }
            }
        }
        if (len > 0) {
            sections.add(first);
            sections.add(len);
        }

        return sections;
    }

    /**
     * Gets size of the offset. Max size is 2^40, i.e. 1 Tb.
     */
    private int getOffsetSize(long startxref) {
        assert startxref >= 0 && startxref < (1L << 40);
        //initial size = 5 bytes. It is 1 Tb. Shall be enough.
        int size = 5;
        long mask = 0xff00000000L;
        for (; size > 1; size--) {
            if ((mask & startxref) != 0)
                break;
            // there is no need to use >>> because mask is positive
            mask >>= 8;
        }
        return size;
    }

    private void appendNewRefToFreeList(PdfIndirectReference reference) {
        reference.setOffset(0);
        if (freeReferencesLinkedList.isEmpty()) {
            assert false;
            // free references list is not initialized yet
            return;
        }
        PdfIndirectReference lastFreeRef = freeReferencesLinkedList.get(0);
        ((PdfIndirectReference) lastFreeRef.setState(PdfObject.MODIFIED)).setOffset(reference.getObjNumber());
        freeReferencesLinkedList.put(reference.getObjNumber(), lastFreeRef);
        freeReferencesLinkedList.put(0, reference);
    }

    /**
     * Removes indirect reference from free references linked list.
     * It does not removes it from xref table and affects only the linked list formed by offset values of free references.
     * @param freeRefObjNr object number of the reference to be removed.
     *                     Removes the free reference with the least object number if this parameter is less than zero:
     *                     this could be used for finding the next free reference for reusing.
     * @return {@link PdfIndirectReference} instance of the removed free reference corresponding to the object number
     * passed as parameter. {@code null} - if given object number doesn't correspond to free reference or equals to zero.
     */
    private PdfIndirectReference removeFreeRefFromList(int freeRefObjNr) {
        if (freeReferencesLinkedList.isEmpty()) {
            assert false;
            // free references list is not initialized yet
            return null;
        }
        if (freeRefObjNr == 0) {
            return null;
        }
        if (freeRefObjNr < 0) {
            Integer leastFreeRefObjNum = null;
            for (Map.Entry entry : freeReferencesLinkedList.entrySet()) {
                if (entry.getKey() <= 0 || xref[entry.getKey()].getGenNumber() >= MAX_GENERATION) {
                    continue;
                }
                leastFreeRefObjNum = entry.getKey();
                break;
            }
            if (leastFreeRefObjNum == null) {
                return null;
            }
            freeRefObjNr = (int)leastFreeRefObjNum;
        }

        PdfIndirectReference freeRef = xref[freeRefObjNr];
        if (!freeRef.isFree()) {
            return null;
        }

        PdfIndirectReference prevFreeRef = freeReferencesLinkedList.remove(freeRef.getObjNumber());
        if (prevFreeRef != null) {
            freeReferencesLinkedList.put((int) freeRef.getOffset(), prevFreeRef);
            ((PdfIndirectReference) prevFreeRef.setState(PdfObject.MODIFIED)).setOffset(freeRef.getOffset());
        }

        return freeRef;
    }

    private void ensureCount(int count) {
        if (count >= xref.length) {
            extendXref(count << 1);
        }
    }

    private void extendXref(int capacity) {
        if (this.memoryLimitsAwareHandler != null) {
            this.memoryLimitsAwareHandler.checkIfXrefStructureExceedsTheLimit(capacity);
        }
        PdfIndirectReference[] newXref = new PdfIndirectReference[capacity];
        System.arraycopy(this.xref, 0, newXref, 0, this.xref.length);
        this.xref = newXref;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy