All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.harvard.hul.ois.jhove.module.pdf.NameTreeNode Maven / Gradle / Ivy

/**********************************************************************
 * Jhove - JSTOR/Harvard Object Validation Environment
 * Copyright 2003 by JSTOR and the President and Fellows of Harvard College
 **********************************************************************/

package edu.harvard.hul.ois.jhove.module.pdf;

import edu.harvard.hul.ois.jhove.module.PdfModule;

import java.io.IOException;
import java.util.*;

/**
 *  Class for nodes of a PDF name tree, e.g., ExternalFiles.  Name trees
 *  are intended for large amounts of data that won't have to all be brought
 *  into memory at once; so this implementation is geared toward file-based
 *  lookup of a key rather than creating an in-memory structure.  Descendant
 *  nodes become available for garbage collection if they are not on the
 *  search path for a match.
 *
 *  Keys are collated according to raw bytes, not character encoding.
 */
public class NameTreeNode 
{
    protected PdfModule _module;
    protected NameTreeNode _parent;
    protected PdfDictionary _dict;  // dictionary which defines this node

    private Vector _kids = null;
    private Vector _names = null;
    private Vector _lowerLimit = null;     // Lower limit of keys for this node -- null for root
    private Vector _upperLimit = null;     // Upper limit of keys for this node -- null for root
    

    /**
     *  Constructor.
     *  @param module     The PdfModule under which we're operating
     *  @param parent     The parent node in the document tree;
     *                    may be null only for the root node
     *  @param dict       The dictionary object on which this node
     *                    is based
     */
    public NameTreeNode (PdfModule module,
                NameTreeNode parent, 
                PdfDictionary dict) throws PdfException
    {
        _module = module;
        _parent = parent;
        _dict = dict;
        
        try {
            // Get the limits of the key range.  If there are no limits, this
            // must be the root node.
            PdfArray limitsDict = (PdfArray) module.resolveIndirectObject
                (dict.get ("Limits"));
            if (limitsDict != null) {
                Vector vec = limitsDict.getContent ();
                PdfSimpleObject limobj = (PdfSimpleObject) vec.elementAt (0);
                _lowerLimit = limobj.getRawBytes ();
                limobj = (PdfSimpleObject) vec.elementAt (1);
                _upperLimit = limobj.getRawBytes ();
            }
            // Get the Kids and Names arrays.  Normally only one will
            // be present.
            // [email protected] : The PDF 1.6 spec is more specific:
            // Root Node: Single entry, either Kids or Names, not both
            // Intermediate Node: MUST have Kids and Limits
            // Leaf Node: MUST have Names and Limits
            PdfArray kidsVec = (PdfArray) module.resolveIndirectObject
                (dict.get ("Kids"));
            if (kidsVec != null) {
                _kids = kidsVec.getContent ();
            }
            PdfArray namesVec = (PdfArray) module.resolveIndirectObject
                (dict.get ("Names"));
            if (namesVec != null) {
                _names = namesVec.getContent ();
            }
        }
        catch (ClassCastException ce) {
            throw new PdfInvalidException (MessageConstants.PDF_HUL_12); // PDF-HUL-12
        }
        catch (ArrayIndexOutOfBoundsException | NullPointerException ce) {
            throw new PdfInvalidException (MessageConstants.PDF_HUL_13); // PDF-HUL-13
        }
        catch (IOException e) {
            throw new PdfMalformedException (MessageConstants.PDF_HUL_14); // PDF-HUL-14
        }
    }

    /**
     * See if a key is within the bounds of this node.  All keys
     * are within the bounds of the root node.
     */
    public boolean inBounds (Vector key) 
    {
        if (_lowerLimit == null) {
            return true;    // root node
        }
        if (_upperLimit == null) {
            return true;    // no upper limit is specified
        }
        return !(compareKey (key, _lowerLimit) < 0 || 
                compareKey (key, _upperLimit) > 0);
    }
    
    
    /** 
     *  Get the PdfObject which matches the key, or null if there is no match.
     */
    public PdfObject get (Vector key) throws PdfException
    {
        try {
            if (!inBounds (key)) {
                return null;
            }
            // If this has a Names array, it's a leaf node or standalone root;
            // search it for the key.
            if (_names != null) {
                for (int i = 0; i < _names.size (); i += 2) {
                    PdfSimpleObject k1 = (PdfSimpleObject) _names.elementAt (i);
                    int cmp = compareKey (key, k1.getRawBytes ());
                    if (cmp == 0) {
                        /* Match! */
                        return _module.resolveIndirectObject
                            ((PdfObject) _names.elementAt (i + 1));
                    }
                    else if (cmp < 0) {
                        // Passed position where match should be
                        return null; 
                    }
                }
                return null;     // just not there
            }
            else if (_kids != null) {
                // It's a non-standalone root or an intermediate note.
                // Figure out which descendant we should search.
                for (int i = 0; i < _kids.size (); i++) {
                    PdfDictionary kid = (PdfDictionary)
                        _module.resolveIndirectObject (
                            (PdfObject) _kids.elementAt (i));
                    NameTreeNode kidnode = new NameTreeNode (_module, this, kid);
                    if (kidnode.inBounds (key)) {
                        PdfObject res = kidnode.get (key);
                        if (res != null) {
                            return res;
                        }
                    }
                }
                return null;    // Not in any subnode
            }
            else throw new PdfMalformedException (MessageConstants.PDF_HUL_15); // PDF-HUL-15
        }
        catch (IOException | ArrayIndexOutOfBoundsException | NullPointerException | ClassCastException e) {
            throw new PdfMalformedException (MessageConstants.PDF_HUL_16); // PDF-HUL-16
        }
    }

    /*  Compare two keys (Vectors of Integer).  Returns -1 if the
        first argument is less than the second, 1 if the first argument
        is greater, and 0 if they are equal.  Key A is less than key B
        if A is a prefix of B. */
    private static int compareKey (Vector a, Vector b) {
        int lena = a.size ();
        int lenb = b.size ();
        int len = (lena < lenb ? lena : lenb);
        for (int i = 0; i < len; i++) {
            int ai = ((Integer) a.elementAt (i)).intValue ();
            int bi = ((Integer) b.elementAt (i)).intValue ();
            if (ai < bi) {
                return -1;
            }
            else if (ai > bi) {
                return 1;
            }
        }
        // Both are equal as far as the length of the shorter one goes.
        // To be equal, they must have the same length; otherwise the
        // shorter one is the lesser.
        if (lena == lenb) {
            return 0;
        }
        else if (lena < lenb) {
            return -1;
        }
        else {
            return 1;
        }
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy