org.apache.pdfbox.pdmodel.common.PDPageLabels Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of pdfbox Show documentation
The Apache PDFBox library is an open source Java tool for working with PDF documents.
There is a newer version: 3.0.2
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdmodel.common;

import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.TreeMap;
import java.util.Map.Entry;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;

/**
 * Represents the page label dictionary of a document.
 * 
 * @author Igor Podolskiy
 */
public class PDPageLabels implements COSObjectable
{

    private Map labels;

    private PDDocument doc;

    /**
     * Creates an empty page label dictionary for the given document.
     * 
     * 
     * Note that the page label dictionary won't be automatically added to the
     * document; you will still need to do it manually (see
     * {@link org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)}.
     * 
     * 
     * @param document
     *            The document the page label dictionary is created for.
     * @see org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)
     */
    public PDPageLabels(PDDocument document)
    {
        labels = new TreeMap();
        this.doc = document;
        PDPageLabelRange defaultRange = new PDPageLabelRange();
        defaultRange.setStyle(PDPageLabelRange.STYLE_DECIMAL);
        labels.put(0, defaultRange);
    }

    /**
     * Creates an page label dictionary for a document using the information in
     * the given COS dictionary.
     * 
     * 
     * Note that the page label dictionary won't be automatically added to the
     * document; you will still need to do it manually (see
     * {@link org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)}.
     * 
     * 
     * @param document
     *            The document the page label dictionary is created for.
     * @param dict
     *            an existing page label dictionary
     * @see org.apache.pdfbox.pdmodel.PDDocumentCatalog#setPageLabels(PDPageLabels)
     * @throws IOException
     *             If something goes wrong during the number tree conversion.
     */
    public PDPageLabels(PDDocument document, COSDictionary dict) throws IOException
    {
        this(document);
        if (dict == null)
        {
            return;
        }
        PDNumberTreeNode root = new PDNumberTreeNode(dict, COSDictionary.class);
        findLabels(root);
    }
    
    private void findLabels(PDNumberTreeNode node) throws IOException 
    {
        if (node.getKids() != null) 
        {
            List kids = node.getKids();
            for (PDNumberTreeNode kid : kids) 
            {
                findLabels(kid);
            }
        }
        else if (node.getNumbers() != null) 
        {
            Map numbers = node.getNumbers();
            for (Entry i : numbers.entrySet())
            {
                if(i.getKey() >= 0)
                {
                    labels.put(i.getKey(), new PDPageLabelRange((COSDictionary)i.getValue()));
                }
            }
        }
    }


    /**
     * Returns the number of page label ranges.
     * 
     * 
     * This will be always >= 1, as the required default entry for the page
     * range starting at the first page is added automatically by this
     * implementation (see PDF32000-1:2008, p. 375).
     * 
     * 
     * @return the number of page label ranges.
     */
    public int getPageRangeCount()
    {
        return labels.size();
    }

    /**
     * Returns the page label range starting at the given page, or {@code null}
     * if no such range is defined.
     * 
     * @param startPage
     *            the 0-based page index representing the start page of the page
     *            range the item is defined for.
     * @return the page label range or {@code null} if no label range is defined
     *         for the given start page.
     */
    public PDPageLabelRange getPageLabelRange(int startPage)
    {
        return labels.get(startPage);
    }

    /**
     * Sets the page label range beginning at the specified start page.
     * 
     * @param startPage
     *            the 0-based index of the page representing the start of the
     *            page label range.
     * @param item
     *            the page label item to set.
     * @throws IllegalArgumentException if the startPage parameter is < 0.
     */
    public void setLabelItem(int startPage, PDPageLabelRange item)
    {
        if (startPage < 0)
        {
            throw new IllegalArgumentException("startPage parameter of setLabelItem may not be < 0");
        }
        labels.put(startPage, item);
    }
    
    /**
     * {@inheritDoc} 
     */
    @Override
    public COSBase getCOSObject()
    {
        COSDictionary dict = new COSDictionary();
        COSArray arr = new COSArray();
        for (Entry i : labels.entrySet())
        {
            arr.add(COSInteger.get(i.getKey()));
            arr.add(i.getValue());
        }
        dict.setItem(COSName.NUMS, arr);
        return dict;
    }

    /**
     * Returns a mapping with computed page labels as keys and corresponding
     * 0-based page indices as values. The returned map will contain at most as
     * much entries as the document has pages.
     * 
     * 
     * NOTE: If the document contains duplicate page labels,
     * the returned map will contain less entries than the document has
     * pages. The page index returned in this case is the highest index
     * among all pages sharing the same label.
     * 
     * 
     * @return a mapping from labels to 0-based page indices.
     */
    public Map getPageIndicesByLabels()
    {
        final Map labelMap = 
            new HashMap(doc.getNumberOfPages());
        computeLabels(new LabelHandler()
        {
            @Override
            public void newLabel(int pageIndex, String label)
            {
                labelMap.put(label, pageIndex);
            }
        });
        return labelMap;
    }

    /**
     * Returns a mapping with 0-based page indices as keys and corresponding
     * page labels as values as an array. The array will have exactly as much
     * entries as the document has pages.
     * 
     * @return an array mapping from 0-based page indices to labels.
     */
    public String[] getLabelsByPageIndices()
    {
        final String[] map = new String[doc.getNumberOfPages()];
        computeLabels(new LabelHandler()
        {
            @Override
            public void newLabel(int pageIndex, String label)
            {
                if(pageIndex < doc.getNumberOfPages())
                { 
                    map[pageIndex] = label;
                }
            }
        });
        return map;
    }

    /**
     * Internal interface for the control flow support.
     * 
     * @author Igor Podolskiy
     */
    private interface LabelHandler
    {
        void newLabel(int pageIndex, String label);
    }

    private void computeLabels(LabelHandler handler)
    {
        Iterator> iterator = 
            labels.entrySet().iterator();
        if (!iterator.hasNext())
        {
            return;
        }
        int pageIndex = 0;
        Entry lastEntry = iterator.next();
        while (iterator.hasNext())
        {
            Entry entry = iterator.next();
            int numPages = entry.getKey() - lastEntry.getKey();
            LabelGenerator gen = new LabelGenerator(lastEntry.getValue(),
                    numPages);
            while (gen.hasNext())
            {
                handler.newLabel(pageIndex, gen.next());
                pageIndex++;
            }
            lastEntry = entry;
        }
        LabelGenerator gen = new LabelGenerator(lastEntry.getValue(), 
                doc.getNumberOfPages() - lastEntry.getKey());
        while (gen.hasNext())
        {
            handler.newLabel(pageIndex, gen.next());
            pageIndex++;
        }
    }

    /**
     * Generates the labels in a page range.
     * 
     * @author Igor Podolskiy
     * 
     */
    private static class LabelGenerator implements Iterator
    {
        private final PDPageLabelRange labelInfo;
        private final int numPages;
        private int currentPage;

        LabelGenerator(PDPageLabelRange label, int pages)
        {
            this.labelInfo = label;
            this.numPages = pages;
            this.currentPage = 0;
        }

        @Override
        public boolean hasNext()
        {
            return currentPage < numPages;
        }

        @Override
        public String next()
        {
            if (!hasNext())
            {
                throw new NoSuchElementException();
            }
            StringBuilder buf = new StringBuilder();
            if (labelInfo.getPrefix() != null)
            {
                String label = labelInfo.getPrefix();
                // there may be some labels with some null bytes at the end
                // which will lead to an incomplete output, see PDFBOX-1047
                while (label.lastIndexOf(0) != -1)
                {
                    label = label.substring(0, label.length()-1);
                }
                buf.append(label);
            }
            if (labelInfo.getStyle() != null)
            {
                buf.append(getNumber(labelInfo.getStart() + currentPage,
                        labelInfo.getStyle()));
            }
            currentPage++;
            return buf.toString();
        }

        private String getNumber(int pageIndex, String style)
        {
            if (PDPageLabelRange.STYLE_DECIMAL.equals(style))
            {
                return Integer.toString(pageIndex);
            }
            else if (PDPageLabelRange.STYLE_LETTERS_LOWER.equals(style))
            {
                return makeLetterLabel(pageIndex);
            }
            else if (PDPageLabelRange.STYLE_LETTERS_UPPER.equals(style))
            {
                return makeLetterLabel(pageIndex).toUpperCase();
            }
            else if (PDPageLabelRange.STYLE_ROMAN_LOWER.equals(style))
            {
                return makeRomanLabel(pageIndex);
            }
            else if (PDPageLabelRange.STYLE_ROMAN_UPPER.equals(style))
            {
                return makeRomanLabel(pageIndex).toUpperCase();
            }
            else
            {
                // Fall back to decimals.
                return Integer.toString(pageIndex);
            }
        }

        /**
         * Lookup table used by the {@link #makeRomanLabel(int)} method.
         */
        private static final String[][] ROMANS = new String[][] {
            { "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" },
            { "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" },
            { "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" }
        };

        private static String makeRomanLabel(int pageIndex)
        {
            StringBuilder buf = new StringBuilder();
            int power = 0;
            while (power < 3 && pageIndex > 0)
            {
                buf.insert(0, ROMANS[power][pageIndex % 10]);
                pageIndex /= 10;
                power++;
            }
            // Prepend as many m as there are thousands (which is
            // incorrect by the roman numeral rules for numbers > 3999,
            // but is unbounded and Adobe Acrobat does it this way).
            // This code is somewhat inefficient for really big numbers,
            // but those don't occur too often (and the numbers in those cases
            // would be incomprehensible even if we and Adobe
            // used strict Roman rules).
            for (int i = 0; i < pageIndex; i++)
            {
                buf.insert(0, 'm');
            }
            return buf.toString();
        }

        /**
         * a..z, aa..zz, aaa..zzz ... labeling as described in PDF32000-1:2008,
         * Table 159, Page 375.
         */
        private static String makeLetterLabel(int num)
        {
            StringBuilder buf = new StringBuilder();
            int numLetters = num / 26 + Integer.signum(num % 26);
            int letter = num % 26 + 26 * (1 - Integer.signum(num % 26)) + 'a' - 1;
            for (int i = 0; i < numLetters; i++)
            {
                buf.appendCodePoint(letter);
            }
            return buf.toString();
        }

        @Override
        public void remove()
        {
            // This is a generator, no removing allowed.
            throw new UnsupportedOperationException();
        }
    }
}