All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.pdfview.PDFObject Maven / Gradle / Ivy

/*
 * $Id: PDFObject.java,v 1.8 2009/03/15 20:47:38 tomoke Exp $
 *
 * Copyright 2004 Sun Microsystems, Inc., 4150 Network Circle,
 * Santa Clara, California 95054, U.S.A. All rights reserved.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 * 
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
package com.sun.pdfview;

import java.io.IOException;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.util.*;

import com.sun.pdfview.decode.PDFDecoder;
import com.sun.pdfview.decrypt.PDFDecrypter;
import com.sun.pdfview.decrypt.IdentityDecrypter;

/**
 * a class encapsulating all the possibilities of content for
 * an object in a PDF file.
 * 

* A PDF object can be a simple type, like a Boolean, a Number, * a String, or the Null value. It can also be a NAME, which * looks like a string, but is a special type in PDF files, like * "/Name". *

* A PDF object can also be complex types, including Array; * Dictionary; Stream, which is a Dictionary plus an array of * bytes; or Indirect, which is a reference to some other * PDF object. Indirect references will always be dereferenced * by the time any data is returned from one of the methods * in this class. * * @author Mike Wessler */ public class PDFObject { /** an indirect reference*/ public static final int INDIRECT = 0; // PDFXref /** a Boolean */ public static final int BOOLEAN = 1; // Boolean /** a Number, represented as a double */ public static final int NUMBER = 2; // Double /** a String */ public static final int STRING = 3; // String /** a special string, seen in PDF files as /Name */ public static final int NAME = 4; // String /** an array of PDFObjects */ public static final int ARRAY = 5; // Array of PDFObject /** a Hashmap that maps String names to PDFObjects */ public static final int DICTIONARY = 6; // HashMap(String->PDFObject) /** a Stream: a Hashmap with a byte array */ public static final int STREAM = 7; // HashMap + byte[] /** the NULL object (there is only one) */ public static final int NULL = 8; // null /** a special PDF bare word, like R, obj, true, false, etc */ public static final int KEYWORD = 9; // String /** * When a value of {@link #getObjGen objNum} or {@link #getObjGen objGen}, * indicates that the object is not top-level, and is embedded in another * object */ public static final int OBJ_NUM_EMBEDDED = -2; /** * When a value of {@link #getObjGen objNum} or {@link #getObjGen objGen}, * indicates that the object is not top-level, and is embedded directly * in the trailer. */ public static final int OBJ_NUM_TRAILER = -1; /** the NULL PDFObject */ public static final PDFObject nullObj = new PDFObject(null, NULL, null); /** the type of this object */ private int type; /** the value of this object. It can be a wide number of things, defined by type */ private Object value; /** the encoded stream, if this is a STREAM object */ private ByteBuffer stream; /** a cached version of the decoded stream */ private SoftReference decodedStream; /** * the PDFFile from which this object came, used for * dereferences */ private PDFFile owner; /** * a cache of translated data. This data can be * garbage collected at any time, after which it will * have to be rebuilt. */ private SoftReference cache; /** @see #getObjNum() */ private int objNum = OBJ_NUM_EMBEDDED; /** @see #getObjGen() */ private int objGen = OBJ_NUM_EMBEDDED; /** * create a new simple PDFObject with a type and a value * @param owner the PDFFile in which this object resides, used * for dereferencing. This may be null. * @param type the type of object * @param value the value. For DICTIONARY, this is a HashMap. * for ARRAY it's an ArrayList. For NUMBER, it's a Double. * for BOOLEAN, it's Boolean.TRUE or Boolean.FALSE. For * everything else, it's a String. */ public PDFObject(PDFFile owner, int type, Object value) { this.type = type; if (type == NAME) { value = ((String) value).intern(); } else if (type == KEYWORD && value.equals("true")) { this.type = BOOLEAN; value = Boolean.TRUE; } else if (type == KEYWORD && value.equals("false")) { this.type = BOOLEAN; value = Boolean.FALSE; } this.value = value; this.owner = owner; } /** * create a new PDFObject that is the closest match to a * given Java object. Possibilities include Double, String, * PDFObject[], HashMap, Boolean, or PDFParser.Tok, * which should be "true" or "false" to turn into a BOOLEAN. * * @param obj the sample Java object to convert to a PDFObject. * @throws PDFParseException if the object isn't one of the * above examples, and can't be turned into a PDFObject. */ public PDFObject(Object obj) throws PDFParseException { this.owner = null; this.value = obj; if ((obj instanceof Double) || (obj instanceof Integer)) { this.type = NUMBER; } else if (obj instanceof String) { this.type = NAME; } else if (obj instanceof PDFObject[]) { this.type = ARRAY; } else if (obj instanceof Object[]) { Object[] srcary = (Object[]) obj; PDFObject[] dstary = new PDFObject[srcary.length]; for (int i = 0; i < srcary.length; i++) { dstary[i] = new PDFObject(srcary[i]); } value = dstary; this.type = ARRAY; } else if (obj instanceof HashMap) { this.type = DICTIONARY; } else if (obj instanceof Boolean) { this.type = BOOLEAN; } else if (obj instanceof PDFParser.Tok) { PDFParser.Tok tok = (PDFParser.Tok) obj; if (tok.name.equals("true")) { this.value = Boolean.TRUE; this.type = BOOLEAN; } else if (tok.name.equals("false")) { this.value = Boolean.FALSE; this.type = BOOLEAN; } else { this.value = tok.name; this.type = NAME; } } else { throw new PDFParseException("Bad type for raw PDFObject: " + obj); } } /** * create a new PDFObject based on a PDFXref * @param owner the PDFFile from which the PDFXref was drawn * @param xref the PDFXref to turn into a PDFObject */ public PDFObject(PDFFile owner, PDFXref xref) { this.type = INDIRECT; this.value = xref; this.owner = owner; } /** * get the type of this object. The object will be * dereferenced, so INDIRECT will never be returned. * @return the type of the object */ public int getType() throws IOException { if (type == INDIRECT) { return dereference().getType(); } return type; } /** * set the stream of this object. It should have been * a DICTIONARY before the call. * @param data the data, as a ByteBuffer. */ public void setStream(ByteBuffer data) { this.type = STREAM; this.stream = data; } /** * get the value in the cache. May become null at any time. * @return the cached value, or null if the value has been * garbage collected. */ public Object getCache() throws IOException { if (type == INDIRECT) { return dereference().getCache(); } else if (cache != null) { return cache.get(); } else { return null; } } /** * set the cached value. The object may be garbage collected * if no other reference exists to it. * @param obj the object to be cached */ public void setCache(Object obj) throws IOException { if (type == INDIRECT) { dereference().setCache(obj); return; } else { cache = new SoftReference(obj); } } /** * get the stream from this object. Will return null if this * object isn't a STREAM. * @return the stream, or null, if this isn't a STREAM. */ public byte[] getStream() throws IOException { if (type == INDIRECT) { return dereference().getStream(); } else if (type == STREAM && stream != null) { byte[] data = null; synchronized (stream) { // decode ByteBuffer streamBuf = decodeStream(); // ByteBuffer streamBuf = stream; // First try to use the array with no copying. This can only // be done if the buffer has a backing array, and is not a slice if (streamBuf.hasArray() && streamBuf.arrayOffset() == 0) { byte[] ary = streamBuf.array(); // make sure there is no extra data in the buffer if (ary.length == streamBuf.remaining()) { return ary; } } // Can't use the direct buffer, so copy the data (bad) data = new byte[streamBuf.remaining()]; streamBuf.get(data); // return the stream to its starting position streamBuf.flip(); } return data; } else if (type == STRING) { return PDFStringUtil.asBytes(getStringValue()); } // wrong type return null; } /** * get the stream from this object as a byte buffer. Will return null if * this object isn't a STREAM. * @return the buffer, or null, if this isn't a STREAM. */ public ByteBuffer getStreamBuffer() throws IOException { if (type == INDIRECT) { return dereference().getStreamBuffer(); } else if (type == STREAM && stream != null) { synchronized (stream) { ByteBuffer streamBuf = decodeStream(); // ByteBuffer streamBuf = stream; return streamBuf.duplicate(); } } else if (type == STRING) { String src = getStringValue(); return ByteBuffer.wrap(src.getBytes()); } // wrong type return null; } /** * Get the decoded stream value */ private ByteBuffer decodeStream() throws IOException { ByteBuffer outStream = null; // first try the cache if (decodedStream != null) { outStream = (ByteBuffer) decodedStream.get(); } // no luck in the cache, do the actual decoding if (outStream == null) { stream.rewind(); outStream = PDFDecoder.decodeStream(this, stream); decodedStream = new SoftReference(outStream); } return outStream; } /** * get the value as an int. Will return 0 if this object * isn't a NUMBER. */ public int getIntValue() throws IOException { if (type == INDIRECT) { return dereference().getIntValue(); } else if (type == NUMBER) { return ((Double) value).intValue(); } // wrong type return 0; } /** * get the value as a float. Will return 0 if this object * isn't a NUMBER */ public float getFloatValue() throws IOException { if (type == INDIRECT) { return dereference().getFloatValue(); } else if (type == NUMBER) { return ((Double) value).floatValue(); } // wrong type return 0; } /** * get the value as a double. Will return 0 if this object * isn't a NUMBER. */ public double getDoubleValue() throws IOException { if (type == INDIRECT) { return dereference().getDoubleValue(); } else if (type == NUMBER) { return ((Double) value).doubleValue(); } // wrong type return 0; } /** * get the value as a String. Will return null if the object * isn't a STRING, NAME, or KEYWORD. This method will NOT * convert a NUMBER to a String. If the string is actually * a text string (i.e., may be encoded in UTF16-BE or PdfDocEncoding), * then one should use {@link #getTextStringValue()} or use one * of the {@link PDFStringUtil} methods on the result from this * method. The string value represents exactly the sequence of 8 bit * characters present in the file, decrypted and decoded as appropriate, * into a string containing only 8 bit character values - that is, each * char will be between 0 and 255. */ public String getStringValue() throws IOException { if (type == INDIRECT) { return dereference().getStringValue(); } else if (type == STRING || type == NAME || type == KEYWORD) { return (String) value; } // wrong type return null; } /** * Get the value as a text string; i.e., a string encoded in UTF-16BE * or PDFDocEncoding. Simple latin alpha-numeric characters are preserved in * both these encodings. * @return the text string value * @throws IOException */ public String getTextStringValue() throws IOException { return PDFStringUtil.asTextString(getStringValue()); } /** * get the value as a PDFObject[]. If this object is an ARRAY, * will return the array. Otherwise, will return an array * of one element with this object as the element. */ public PDFObject[] getArray() throws IOException { if (type == INDIRECT) { return dereference().getArray(); } else if (type == ARRAY) { PDFObject[] ary = (PDFObject[]) value; return ary; } else { PDFObject[] ary = new PDFObject[1]; ary[0] = this; return ary; } } /** * get the value as a boolean. Will return false if this * object is not a BOOLEAN */ public boolean getBooleanValue() throws IOException { if (type == INDIRECT) { return dereference().getBooleanValue(); } else if (type == BOOLEAN) { return value == Boolean.TRUE; } // wrong type return false; } /** * if this object is an ARRAY, get the PDFObject at some * position in the array. If this is not an ARRAY, returns * null. */ public PDFObject getAt(int idx) throws IOException { if (type == INDIRECT) { return dereference().getAt(idx); } else if (type == ARRAY) { PDFObject[] ary = (PDFObject[]) value; return ary[idx]; } // wrong type return null; } /** * get an Iterator over all the keys in the dictionary. If * this object is not a DICTIONARY or a STREAM, returns an * Iterator over the empty list. */ public Iterator getDictKeys() throws IOException { if (type == INDIRECT) { return dereference().getDictKeys(); } else if (type == DICTIONARY || type == STREAM) { return ((HashMap) value).keySet().iterator(); } // wrong type return new ArrayList().iterator(); } /** * get the dictionary as a HashMap. If this isn't a DICTIONARY * or a STREAM, returns null */ public HashMap getDictionary() throws IOException { if (type == INDIRECT) { return dereference().getDictionary(); } else if (type == DICTIONARY || type == STREAM) { return (HashMap) value; } // wrong type return new HashMap(); } /** * get the value associated with a particular key in the * dictionary. If this isn't a DICTIONARY or a STREAM, * or there is no such key, returns null. */ public PDFObject getDictRef(String key) throws IOException { if (type == INDIRECT) { return dereference().getDictRef(key); } else if (type == DICTIONARY || type == STREAM) { key = key.intern(); HashMap h = (HashMap) value; PDFObject obj = (PDFObject) h.get(key.intern()); return obj; } // wrong type return null; } /** * returns true only if this object is a DICTIONARY or a * STREAM, and the "Type" entry in the dictionary matches a * given value. * @param match the expected value for the "Type" key in the * dictionary * @return whether the dictionary is of the expected type */ public boolean isDictType(String match) throws IOException { if (type == INDIRECT) { return dereference().isDictType(match); } else if (type != DICTIONARY && type != STREAM) { return false; } PDFObject obj = getDictRef("Type"); return obj != null && obj.getStringValue().equals(match); } public PDFDecrypter getDecrypter() { // PDFObjects without owners are always created as part of // content instructions. Such objects will never have encryption // applied to them, as the stream they're contained by is the // unit of encryption. So, if someone asks for the decrypter for // one of these in-stream objects, no decryption should // ever be applied. This can be seen with inline images. return owner != null ? owner.getDefaultDecrypter() : IdentityDecrypter.getInstance(); } /** * Set the object identifiers * @param objNum the object number * @param objGen the object generation number */ public void setObjectId(int objNum, int objGen) { assert objNum >= OBJ_NUM_TRAILER; assert objGen >= OBJ_NUM_TRAILER; this.objNum = objNum; this.objGen = objGen; } /** * Get the object number of this object; a negative value indicates that * the object is not numbered, as it's not a top-level object: if the value * is {@link #OBJ_NUM_EMBEDDED}, it is because it's embedded within * another object. If the value is {@link #OBJ_NUM_TRAILER}, it's because * it's an object from the trailer. * @return the object number, if positive */ public int getObjNum() { return objNum; } /** * Get the object generation number of this object; a negative value * indicates that the object is not numbered, as it's not a top-level * object: if the value is {@link #OBJ_NUM_EMBEDDED}, it is because it's * embedded within another object. If the value is {@link * #OBJ_NUM_TRAILER}, it's because it's an object from the trailer. * @return the object generation number, if positive */ public int getObjGen() { return objGen; } /** * return a representation of this PDFObject as a String. * Does NOT dereference anything: this is the only method * that allows you to distinguish an INDIRECT PDFObject. */ @Override public String toString() { try { if (type == INDIRECT) { StringBuffer str = new StringBuffer (); str.append("Indirect to #" + ((PDFXref) value).getID()); try { str.append("\n" + dereference().toString()); } catch (Throwable t) { str.append(t.toString()); } return str.toString(); } else if (type == BOOLEAN) { return "Boolean: " + (getBooleanValue() ? "true" : "false"); } else if (type == NUMBER) { return "Number: " + getDoubleValue(); } else if (type == STRING) { return "String: " + getStringValue(); } else if (type == NAME) { return "Name: /" + getStringValue(); } else if (type == ARRAY) { return "Array, length=" + ((PDFObject[]) value).length; } else if (type == DICTIONARY) { StringBuffer sb = new StringBuffer(); PDFObject obj = getDictRef("Type"); if (obj != null) { sb.append(obj.getStringValue()); obj = getDictRef("Subtype"); if (obj == null) { obj = getDictRef("S"); } if (obj != null) { sb.append("/" + obj.getStringValue()); } } else { sb.append("Untyped"); } sb.append(" dictionary. Keys:"); HashMap hm = (HashMap) value; Iterator it = hm.entrySet().iterator(); Map.Entry entry; while (it.hasNext()) { entry = (Map.Entry) it.next(); sb.append("\n " + entry.getKey() + " " + entry.getValue()); } return sb.toString(); } else if (type == STREAM) { byte[] st = getStream(); if (st == null) { return "Broken stream"; } return "Stream: [[" + new String(st, 0, st.length > 30 ? 30 : st.length) + "]]"; } else if (type == NULL) { return "Null"; } else if (type == KEYWORD) { return "Keyword: " + getStringValue(); /* } else if (type==IMAGE) { StringBuffer sb= new StringBuffer(); java.awt.Image im= (java.awt.Image)stream; sb.append("Image ("+im.getWidth(null)+"x"+im.getHeight(null)+", with keys:"); HashMap hm= (HashMap)value; Iterator it= hm.keySet().iterator(); while(it.hasNext()) { sb.append(" "+(String)it.next()); } return sb.toString();*/ } else { return "Whoops! big error! Unknown type"; } } catch (IOException ioe) { return "Caught an error: " + ioe; } } /** * Make sure that this object is dereferenced. Use the cache of * an indirect object to cache the dereferenced value, if possible. */ public PDFObject dereference() throws IOException { if (type == INDIRECT) { PDFObject obj = null; if (cache != null) { obj = (PDFObject) cache.get(); } if (obj == null || obj.value == null) { if (owner == null) { System.out.println("Bad seed (owner==null)! Object=" + this); } obj = owner.dereference((PDFXref)value, getDecrypter()); cache = new SoftReference(obj); } return obj; } else { // not indirect, no need to dereference return this; } } /** * Identify whether the object is currently an indirect/cross-reference * @return whether currently indirect */ public boolean isIndirect() { return (type == INDIRECT); } /** * Test whether two PDFObject are equal. Objects are equal IFF they * are the same reference OR they are both indirect objects with the * same id and generation number in their xref */ @Override public boolean equals(Object o) { if (super.equals(o)) { // they are the same object return true; } else if (type == INDIRECT && o instanceof PDFObject) { // they are both PDFObjects. Check type and xref. PDFObject obj = (PDFObject) o; if (obj.type == INDIRECT) { PDFXref lXref = (PDFXref) value; PDFXref rXref = (PDFXref) obj.value; return ((lXref.getID() == rXref.getID()) && (lXref.getGeneration() == rXref.getGeneration())); } } return false; } }