All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.fop.pdf.PDFLinearization Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */
package org.apache.fop.pdf;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.commons.io.output.CountingOutputStream;

public class PDFLinearization {
    private PDFDocument doc;
    private Map> pageObjsMap = new HashMap>();
    private PDFDictionary linearDict;
    private HintTable hintTable;

    public PDFLinearization(PDFDocument doc) {
        this.doc = doc;
    }

    static class HintTable extends PDFStream {
        private List pages;
        int pageStartPos;
        List sharedLengths = new ArrayList();
        List pageLengths = new ArrayList();
        List contentStreamLengths = new ArrayList();
        List objCount = new ArrayList();
        Map hintGroups = new HashMap();

        public HintTable(PDFDocument doc) {
            super(false);
            doc.assignObjectNumber(this);
            doc.addObject(this);
            pages = doc.pageObjs;
            for (int i = 0; i < pages.size(); i++) {
                pageLengths.add(0);
                contentStreamLengths.add(0);
                objCount.add(0);
            }
            hintGroups.put("/C", new int[4]);
            hintGroups.put("/L", new int[4]);
            hintGroups.put("/I", new int[4]);
            hintGroups.put("/E", new int[4]);
            hintGroups.put("/O", new int[4]);
            hintGroups.put("/V", new int[4]);
        }

        @Override
        public PDFFilterList getFilterList() {
            return new PDFFilterList(getDocument().isEncryptionActive());
        }

        @Override
        protected void outputRawStreamData(OutputStream os) throws IOException {
            CountingOutputStream bos = new CountingOutputStream(os);

            //start header
            writeULong(1, bos); //1
            writeULong(pageStartPos, bos); //2
            writeCard16(32, bos); //3
            writeULong(0, bos); //4
            writeCard16(32, bos); //5
            writeULong(0, bos); //6
            writeCard16(0, bos); //7
            writeULong(0, bos); //8
            writeCard16(32, bos); //9
            writeCard16(0, bos); //10
            writeCard16(0, bos); //11
            writeCard16(0, bos); //12
            writeCard16(4, bos); //13
            //end header

            for (PDFPage page : pages) {
                writeULong(objCount.get(page.pageIndex) - 1, bos);
            }
            for (PDFPage page : pages) {
                writeULong(pageLengths.get(page.pageIndex), bos);
            }
            for (PDFPage page : pages) {
                writeULong(contentStreamLengths.get(page.pageIndex), bos);
            }

            writeSharedTable(bos);

            for (Map.Entry group : hintGroups.entrySet()) {
                put(group.getKey(), bos.getCount());
                for (int i : group.getValue()) {
                    writeULong(i, bos);
                }
                if (group.getKey().equals("/C")) {
                    writeULong(0, bos);
                    writeCard16(0, bos);
                }
            }
        }

        private void writeSharedTable(CountingOutputStream bos) throws IOException {
            put("/S", bos.getCount());

            //Shared object hint table, header section
            writeULong(0, bos); //1
            writeULong(0, bos); //2
            writeULong(sharedLengths.size(), bos); //3
            writeULong(sharedLengths.size(), bos); //4
            writeCard16(0, bos); //5
            writeULong(0, bos); //6
            writeCard16(32, bos); //7

            for (int i : sharedLengths) {
                writeULong(i, bos);
            }
            writeULong(0, bos);
        }

        private void writeCard16(int s, OutputStream bos) throws IOException {
            byte b1 = (byte)((s >> 8) & 0xff);
            byte b2 = (byte)(s & 0xff);
            bos.write(b1);
            bos.write(b2);
        }

        private void writeULong(int s, OutputStream bos) throws IOException {
            byte b1 = (byte)((s >> 24) & 0xff);
            byte b2 = (byte)((s >> 16) & 0xff);
            byte b3 = (byte)((s >> 8) & 0xff);
            byte b4 = (byte)(s & 0xff);
            bos.write(b1);
            bos.write(b2);
            bos.write(b3);
            bos.write(b4);
        }
    }

    static class LinearPDFDictionary extends PDFDictionary {
        private int lastsize = -1;

        public LinearPDFDictionary(PDFDocument doc) {
            put("Linearized", 1);
            put("/L", 0);
            PDFArray larray = new PDFArray();
            larray.add(0);
            larray.add(0);
            put("/H", larray);
            doc.assignObjectNumber(this);
            getObjectNumber().getNumber();
            put("/O", getObjectNumber().getNumber() + 3);
            put("/E", 0);
            put("/N", doc.pageObjs.size());
            put("/T", 0);
        }

        public int output(OutputStream stream) throws IOException {
            int size = super.output(stream);
            int padding = lastsize - size + 32;
            if (lastsize == -1) {
                padding = 32;
                lastsize = size;
            }
            writePadding(padding, stream);
            return size + padding;
        }
    }


    private Set assignNumbers() throws IOException {
        Set page1Children = getPage1Children();
        if (!doc.pageObjs.isEmpty()) {
            for (int i = 1; i < doc.pageObjs.size(); i++) {
                PDFPage page = doc.pageObjs.get(i);
                Set children = pageObjsMap.get(page);
                for (PDFObject c : children) {
                    if (!page1Children.contains(c) && c.hasObjectNumber()) {
                        c.getObjectNumber().getNumber();
                    }
                }
            }
            for (PDFObject o : doc.objects) {
                if (o instanceof PDFDests || o instanceof PDFOutline) {
                    for (PDFObject c : getChildren(o)) {
                        c.getObjectNumber().getNumber();
                    }
                }
                if (o instanceof PDFInfo || o instanceof PDFPageLabels) {
                    o.getObjectNumber().getNumber();
                }
            }
            for (PDFObject o : doc.objects) {
                if (!page1Children.contains(o)) {
                    o.getObjectNumber().getNumber();
                }
            }
        }
        linearDict = new LinearPDFDictionary(doc);
        for (PDFObject o : page1Children) {
            o.getObjectNumber().getNumber();
        }
        sort(doc.objects);
        return page1Children;
    }

    private void sort(List objects) {
        Collections.sort(objects, new Comparator() {
            public int compare(PDFObject o1, PDFObject o2) {
                return Integer.compare(o1.getObjectNumber().getNumber(), o2.getObjectNumber().getNumber());
            }
        });
    }

    private Set getChildren(PDFObject o) {
        Set children = new LinkedHashSet();
        children.add(o);
        o.getChildren(children);
        return children;
    }

    public void outputPages(OutputStream stream) throws IOException {
        Collections.sort(doc.pageObjs, new Comparator() {
            public int compare(PDFPage o1, PDFPage o2) {
                return Integer.compare(o1.pageIndex, o2.pageIndex);
            }
        });
        doc.objects.addAll(doc.trailerObjects);
        doc.trailerObjects = null;
        if (doc.getStructureTreeElements() != null) {
            doc.objects.addAll(doc.getStructureTreeElements());
            doc.structureTreeElements = null;
        }
        for (int i = 0; i < doc.objects.size() * 2; i++) {
            doc.indirectObjectOffsets.add(0L);
        }
        Set page1Children = assignNumbers();
        doc.streamIndirectObject(linearDict, new ByteArrayOutputStream());
        for (PDFObject o : page1Children) {
            doc.objects.remove(o);
        }
        int sizeOfRest = doc.objects.size();

        ByteArrayOutputStream fakeHeaderTrailerStream = new ByteArrayOutputStream();
        long topTrailer = doc.position;
        doc.writeTrailer(fakeHeaderTrailerStream, sizeOfRest, page1Children.size() + 1,
                page1Children.size() + sizeOfRest + 1, Long.MAX_VALUE, 0);
        doc.position += fakeHeaderTrailerStream.size();

        ByteArrayOutputStream pageStream = new ByteArrayOutputStream();
        writeObjects(page1Children, pageStream, sizeOfRest + 1);
        long trailerOffset = doc.position;
        ByteArrayOutputStream footerTrailerStream = new ByteArrayOutputStream();
        doc.writeTrailer(footerTrailerStream, 0, sizeOfRest, sizeOfRest, 0, topTrailer);
        doc.position += footerTrailerStream.size();

        linearDict.put("/L", doc.position);

        PDFDocument.outputIndirectObject(linearDict, stream);
        CountingOutputStream realTrailer = new CountingOutputStream(stream);
        doc.writeTrailer(realTrailer, sizeOfRest, page1Children.size() + 1,
                page1Children.size() + sizeOfRest + 1, trailerOffset, 0);
        writePadding(fakeHeaderTrailerStream.size() - realTrailer.getCount(), stream);
        for (PDFObject o : page1Children) {
            PDFDocument.outputIndirectObject(o, stream);
            if (o instanceof HintTable) {
                break;
            }
        }
        stream.write(pageStream.toByteArray());
        stream.write(footerTrailerStream.toByteArray());
    }

    private Set getPage1Children() throws IOException {
        Set page1Children = new LinkedHashSet();
        if (!doc.pageObjs.isEmpty()) {
            PDFPage page1 = doc.pageObjs.get(0);
            page1Children.add(doc.getRoot());
            hintTable = new HintTable(doc);
            page1Children.add(hintTable);
            page1Children.add(page1);
            page1.getChildren(page1Children);
            doc.objects.remove(doc.getPages());
            doc.objects.add(0, doc.getPages());
            pageObjsMap.put(page1, page1Children);

            for (int i = 1; i < doc.pageObjs.size(); i++) {
                PDFPage page = doc.pageObjs.get(i);
                pageObjsMap.put(page, getChildren(page));
            }
        }
        return page1Children;
    }

    private static void writePadding(int padding, OutputStream stream) throws IOException {
        for (int i = 0; i < padding; i++) {
            stream.write(" ".getBytes("UTF-8"));
        }
    }

    private void writeObjects(Set children1, OutputStream pageStream, int sizeOfRest) throws IOException {
        writePage1(children1, pageStream);
        linearDict.put("/E", doc.position);
        for (PDFPage page : doc.pageObjs) {
            if (page.pageIndex != 0) {
                writePage(page, pageStream);
            }
        }
        while (!doc.objects.isEmpty()) {
            PDFObject o = doc.objects.remove(0);
            if (o instanceof PDFOutline) {
                writeObjectGroup("/O", getChildren(o), pageStream);
            } else if (o instanceof PDFDests) {
                writeObjectGroup("/E", getChildren(o), pageStream);
            } else if (o instanceof PDFInfo) {
                writeObjectGroup("/I", getChildren(o), pageStream);
            } else if (o instanceof PDFPageLabels) {
                writeObjectGroup("/L", getChildren(o), pageStream);
            } else if (o instanceof PDFStructTreeRoot) {
                writeObjectGroup("/C", getChildren(o), pageStream);
            } else {
                doc.streamIndirectObject(o, pageStream);
            }
        }
        linearDict.put("/T", doc.position + 8 + String.valueOf(sizeOfRest).length());
    }

    private void writeObjectGroup(String name, Set objects, OutputStream pageStream)
            throws IOException {
        List children = new ArrayList(objects);
        sort(children);

        int[] values = hintTable.hintGroups.get(name);
        values[0] = children.iterator().next().getObjectNumber().getNumber();
        values[1] = (int) doc.position;
        values[2] = children.size();
        for (PDFObject o : children) {
            values[3] += doc.streamIndirectObject(o, pageStream);
            doc.objects.remove(o);
        }
    }

    private void writePage1(Set children1, OutputStream pageStream) throws IOException {
        hintTable.pageStartPos = (int) doc.position;
        OutputStream stream = new ByteArrayOutputStream();

        Set sharedChildren = getSharedObjects();

        int page1Len = 0;
        int objCount = 0;
        int sharedCount = 0;
        for (PDFObject o : children1) {
            if (o instanceof HintTable) {
                PDFArray a = (PDFArray) linearDict.get("/H");
                a.set(0, doc.position);
                doc.streamIndirectObject(o, stream);
                a.set(1, doc.position - (Double)a.get(0));
                stream = pageStream;
            } else {
                int len = doc.streamIndirectObject(o, stream);
                if (o instanceof PDFStream && hintTable.contentStreamLengths.get(0) == 0) {
                    hintTable.contentStreamLengths.set(0, len);
                }
                if (!(o instanceof PDFRoot)) {
                    page1Len += len;
                    objCount++;
                }
                if (sharedChildren.contains(o)) {
                    hintTable.sharedLengths.set(sharedCount, len);
                    sharedCount++;
                }
            }
        }
        hintTable.pageLengths.set(0, page1Len);
        hintTable.objCount.set(0, objCount);
    }

    private Set getSharedObjects() {
        Set pageSharedChildren = getChildren(doc.pageObjs.get(0));
        for (int i = 0; i < pageSharedChildren.size(); i++) {
            hintTable.sharedLengths.add(0);
        }
        return pageSharedChildren;
    }

    private void writePage(PDFPage page, OutputStream pageStream) throws IOException {
        Set children = pageObjsMap.get(page);
        int pageLen = 0;
        int objCount = 0;
        for (PDFObject c : children) {
            if (doc.objects.contains(c)) {
                int len = doc.streamIndirectObject(c, pageStream);
                if (c instanceof PDFStream) {
                    hintTable.contentStreamLengths.set(page.pageIndex, len);
                }
                pageLen += len;
                doc.objects.remove(c);
                objCount++;
            }
        }
        hintTable.pageLengths.set(page.pageIndex, pageLen);
        hintTable.objCount.set(page.pageIndex, objCount);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy