edu.harvard.hul.ois.jhove.module.pdf.PageObject Maven / Gradle / Ivy

/**********************************************************************
 * Jhove - JSTOR/Harvard Object Validation Environment
 * Copyright 2003 by JSTOR and the President and Fellows of Harvard College
 **********************************************************************/

package edu.harvard.hul.ois.jhove.module.pdf;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Vector;

import edu.harvard.hul.ois.jhove.messages.JhoveMessage;
import edu.harvard.hul.ois.jhove.module.PdfModule;

/**
 *  Class encapsulating a PDF page object node.
 */
public class PageObject extends DocNode 
{
    private List _contentStreams = null;  // contents of the page; may be null

    /**
     *  Superclass constructor.
     *  @param module     The module under which we're operating
     *  @param parent     The parent node in the document tree;
     *                    may be null only for the root node
     *  @param dict       The dictionary object on which this node
     *                    is based
     */
    public PageObject (PdfModule module,
                PageTreeNode parent, 
                PdfDictionary dict) throws PdfMalformedException
    {
        super (module, parent, dict);
        _pageObjectFlag = true;
    }

    /**
     *  Find the content stream(s) for this page.  This is
     *  called when the page tree content stream is built
     *  by PageTreeNode.  getContentStreams may 
     *  subsequently be called to get the content.
     */
    public void loadContent (PdfModule module) throws PdfException
    {
        PdfObject contents = _dict.get("Contents");
        // Contents object can be null, the page is empty.
        if (contents == null) {
            return;
        }
        try {
            contents = module.resolveIndirectObject (contents);
            processContents(module, contents);
        }
        catch (IOException e) {
            throw new PdfMalformedException (MessageConstants.PDF_HUL_26, 0); // PDF-HUL-26
        }
    }

    /**
     *   Returns the List of content streams.  The list elements are
     *   of type PdfStream.
     */
    public List getContentStreams ()
    {
        return _contentStreams;
    }
    
    /**
     *  Return the page's Annots array of dictionaries, or null if none
     */
    public PdfArray getAnnotations () throws PdfException
    {
        try {
            return (PdfArray) _module.resolveIndirectObject (_dict.get ("Annots"));
        }
        catch (ClassCastException e) {
            throw new PdfInvalidException (MessageConstants.PDF_HUL_21); // PDF-HUL-21
        }
        catch (IOException e) {
            throw new PdfMalformedException (MessageConstants.PDF_HUL_22); // PDF-HUL-22
        }
    }


    /**
     *  Call this function when recursively walking through a document
     *  tree.  This allows nextPageObject () to be return this object
     *  exactly once.
     */
    @Override
    public void startWalk ()
    {
        _walkFinished = false;
    }
    
    /**
     *  Returns this object the first time it is called after startWalk
     *  is called, then null when called again.  This allows a recursive
     *  walk through a document tree to work properly.
     */
    @Override
    public PageObject nextPageObject ()
    {
        if (_walkFinished)
            return null;
        _walkFinished = true;
        return this;
    }

    /**
     *  Called to walk through all page tree nodes and page objects.
     *  Functionally identical with nextPageObject.
     */
    @Override
    public DocNode nextDocNode ()
    {
        return nextPageObject ();
    }
    
    /**
     *  Returns the ArtBox for the page, or null if none.  Throws a
     *  PDFException if there is an ArtBox but it is not a rectangle.
     */
    public PdfArray getArtBox () throws PdfException
    {
        return retrieveAndCheckRectangle(this._dict, "ArtBox",
                MessageConstants.PDF_HUL_23); // PDF-HUL-23
    }

    /**
     *  Returns the TrimBox for the page, or null if none.  Throws a
     *  PDFException if there is an TrimBox but it is not a rectangle.
     */
    public PdfArray getTrimBox () throws PdfException
    {
        return retrieveAndCheckRectangle(this._dict, "TrimBox",
                MessageConstants.PDF_HUL_24); // PDF-HUL-24
    }

    /**
     *  Returns the BleedBox for the page, or null if none.  Throws a
     *  PDFException if there is an BleedBox but it is not a rectangle.
     */
    public PdfArray getBleedBox () throws PdfException
    {
        return retrieveAndCheckRectangle(this._dict, "BleedBox",
                MessageConstants.PDF_HUL_25); // PDF-HUL-25
    }

    private static PdfArray retrieveAndCheckRectangle(final PdfDictionary dict,
            final String dictKey, final JhoveMessage invalidMessage) throws PdfInvalidException {
        PdfArray mbox = null;
        try {
        // Retrieve the object from the passed dictionary
            mbox = (PdfArray) dict.get (dictKey);
        } catch (ClassCastException e) {
            throw new PdfInvalidException(invalidMessage);
        }
        if (mbox == null) {
            // If it's null it doesn't exist so return null
            return null;
        }
        else if (mbox.toRectangle () != null) {
            // If the returned array is a rectangle the return it
            return mbox;
        }
        else {
            // The retrieved object isn't a rectangle throw the exception
            throw new PdfInvalidException (invalidMessage);
        }
    }

    private void processContents(PdfModule module, final PdfObject contents) throws PdfException, IOException {
        // The Contents entry in the dictionary may be either
        // a stream or an array of streams.
        if (contents instanceof PdfStream) {
            _contentStreams = new ArrayList<>(1);
            _contentStreams.add((PdfStream) contents);
            return;
        }
        else if (contents instanceof PdfArray) {
            loadContentFromArray(module, (PdfArray) contents);
        }
        else {
            throw new PdfInvalidException (MessageConstants.PDF_HUL_27, 0); // PDF-HUL-27
        }
    }

    private void loadContentFromArray(PdfModule module, final PdfArray contents) throws PdfException, IOException {
        Vector contentVec =
            contents.getContent ();
        if (contentVec.size () == 0) {
            return;
        }
        _contentStreams = new ArrayList<>(contentVec.size ());
        for (int i = 0; i < contentVec.size (); i++) {
            PdfObject streamElement = contentVec.elementAt (i);
            streamElement = module.resolveIndirectObject(streamElement);
            if (streamElement instanceof PdfStream) {
                _contentStreams.add ((PdfStream) streamElement);
            }
            else {
                throw new PdfInvalidException (MessageConstants.PDF_HUL_28, 0); // PDF-HUL-28
            }
        }
    }
}