All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.io.types.Dictionary Maven / Gradle / Ivy

There is a newer version: 20151002
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * Dictionary.java
 * ---------------
 */
package org.jpedal.io.types;

import java.util.ArrayList;
import org.jpedal.io.ObjectDecoder;
import static org.jpedal.io.ObjectDecoder.debugFastCode;
import static org.jpedal.io.ObjectDecoder.padding;
import static org.jpedal.io.ObjectDecoder.resolveFully;
import org.jpedal.io.ObjectUtils;
import org.jpedal.io.PdfFileReader;
import org.jpedal.objects.raw.FormObject;
import org.jpedal.objects.raw.ObjectFactory;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.NumberUtils;

/**
 *
 */
public class Dictionary {

    public static int readDictionary(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {
        
        //if we only need top level do not read whole tree
        final boolean ignoreRecursion=pdfObject.ignoreRecursion();
        
        //roll on
        if(raw[i]!='<') {
            i++;
        }
        
        i=StreamReaderUtils.skipSpaces(raw, i);
        
        //some objects can have a common value (ie /ToUnicode /Identity-H
        if(raw[i]==47){
            
            i = readKey(pdfObject, i+1, raw, PDFkeyInt);

        }else if(StreamReaderUtils.isEndObj(raw,i)){ //allow for empty object
                
                if(debugFastCode) {
                    System.out.println(padding + "Empty object" + new String(raw) + "<<");
                }
        
        }else if(!ignoreRecursion){
            i = Dictionary.readDictionaryFromRefOrDirect(pdfObject,pdfObject.getObjectRefAsString(), i, raw, PDFkeyInt,objectReader);
        }else{ //we need to ref from ref elsewhere which may be indirect [ref], hence loop
                
            i = readRef(pdfObject, i, raw, PDFkeyInt, objectReader);
        }
        return i;
    }

    static int readRef(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {

            if(debugFastCode) {
                System.out.println(padding + "1.About to read ref orDirect i=" + i + " char=" + (char) raw[i]);
            }

            i = StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
            
            i = Dictionary.readDictionaryFromRefOrDirect(pdfObject,pdfObject.getObjectRefAsString(), i, raw, PDFkeyInt,objectReader);
 

        return i;
    }

    static int readKey(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt) {

        final int keyStart=i;

        i= StreamReaderUtils.skipToEndOfRef(raw, i);

        final PdfObject valueObj= ObjectFactory.createObject(PDFkeyInt,pdfObject.getObjectRefAsString(), pdfObject.getObjectType(), pdfObject.getID());
        valueObj.setID(PDFkeyInt);

        //store value
        valueObj.setConstant(PDFkeyInt,keyStart,i-keyStart,raw);

        valueObj.setGeneralStringValue(new String(getByteKeyFromStream(i-keyStart, raw, keyStart)));

        if(debugFastCode) {
            System.out.println(padding + "Set Dictionary as String=" + valueObj.getGeneralStringValue() + "  in " + pdfObject + " to " + valueObj);
        }

        //store value
        pdfObject.setDictionary(PDFkeyInt,valueObj);

        return i;
    }

    public static int setDictionaryValue(final PdfObject pdfObject, int i, final byte[] raw, final PdfFileReader objectReader, final int PDFkeyInt) {
        
        //if we only need top level do not read whole tree
        final boolean ignoreRecursion=pdfObject.ignoreRecursion();
        
        if(debugFastCode) {
            System.out.println(padding + ">>>Reading Dictionary Pairs i=" + i + ' ' + (char) raw[i] + (char) raw[i + 1] + (char) raw[i + 2] + (char) raw[i + 3] + (char) raw[i + 4] + (char) raw[i + 5] + (char) raw[i + 6]);
        }
        
        i = StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 47);
        
        //set data which will be switched below if ref
        byte[] data=raw;
        int j=i;
        
        //get next key to see if indirect
        final boolean isRef=data[j]!='<';
        
        if(isRef){
            
            //number
            final int[] values = StreamReaderUtils.readRefFromStream(raw, i);
            final int number = values[0];
            final int generation = values[1];
            i = values[2];
            
            if(!ignoreRecursion){
                
                //read the Dictionary data
                data=objectReader.readObjectAsByteArray(pdfObject, objectReader.isCompressed(number, generation), number, generation);
                
                //allow for data in Linear object not yet loaded
                if(data==null){
                    pdfObject.setFullyResolved(false);
                    
                    if(debugFastCode) {
                        System.out.println(padding + "Data not yet loaded");
                    }
                    
                    return raw.length;
                }
                
                if(data[0]=='<' && data[1]=='<'){
                    j=0;
                }else{
                    //lose obj at start
                    j=3;
                    
                    while(data[j-1]!=106 && data[j-2]!=98 && data[j-3]!=111){
                        
                        if(data[j]=='/'){  //trap for odd case
                            j=0;
                            break;
                        }
                        
                        j++;
                        
                        if(j==data.length){ //some missing obj so catch these
                            j=0;
                            break;
                        }
                    }
                    
                    j=StreamReaderUtils.skipSpaces(data, j);
                    
                    if(data[j]=='%'){
                        j=StreamReaderUtils.skipComment(data, j);
                    }
                }
                
            }
        }
        
        //allow for empty object (ie /Pattern <<>> )
        final int endJ=StreamReaderUtils.skipSpacesOrOtherCharacter(data, j,'<');

        if(data[endJ]=='>'){ //empty object
            j=endJ+1;
        }else{
            
            final PdfObject valueObj= ObjectFactory.createObject(PDFkeyInt, pdfObject.getObjectRefAsString(), pdfObject.getObjectType(), pdfObject.getID());
            valueObj.setID(PDFkeyInt);

            //read pairs (stream in data starting at j)
            j=readKeyPairs(data, j,valueObj);

            //store value
            pdfObject.setDictionary(PDFkeyInt,valueObj);

            if(debugFastCode) {
                System.out.println(padding + "Set Dictionary pairs type in " + pdfObject + " to " + valueObj);
            }
            
        }
        
        //update pointer if direct so at end (if ref already in right place)
        if(!isRef){
            i=j;
            
            if(debugFastCode) {
                System.out.println(i + ">>>>" + data[i - 2] + ' ' + data[i - 1] + " >" + data[i] + "< " + data[i + 1] + ' ' + data[i + 2]);
            }
        }
        return i;
    }

    /**
     * sets pairs and returns point reached in stream
     */
    private static int readKeyPairs(final byte[] data,  int start, final PdfObject pdfObject) {

        final ArrayList keys=new ArrayList(100);
        final ArrayList values=new ArrayList(100);

        while(true){

            //move cursor to start of text
            start = StreamReaderUtils.skipSpacesOrOtherCharacter(data, start, 60);

            if(data[start]==37){ //allow for comment
                start = StreamReaderUtils.skipComment(data, start);
            }

            if(data[start]==62 || StreamReaderUtils.isEndObj(data,start)) { //exit at end
                break;
            }

            //read key (starts with /)           
            final int tokenStart=start+1;
            start=StreamReaderUtils.skipToEndOfKey(data, tokenStart);
            keys.add(getByteKeyFromStream(start-tokenStart, data, tokenStart));

            //read value
            start=StreamReaderUtils.skipSpaces(data,start);

            int refStart=start;

            if(StreamReaderUtils.isNull(data,start)){
                start += 4;
                values.add(null);
            }else {

                if (data[start]==60 || data[start]=='[' || data[start]=='/') {
                    
                    refStart = start;

                    if (data[start] == '<') {
                        start = ObjectUtils.skipToEndOfObject(start, data);
                    } else if (data[start] == '[') {
                        start=StreamReaderUtils.skipToEndOfArray(data, start);                        
                    } else if (data[start] == '/') {
                        start=StreamReaderUtils.skipToEndOfKey(data, start+1);
                    }
                } else { //its 50 0 R
                        while (data[start] != 'R') {
                            start++;
                        }

                    start++; //roll past R
                }
                
                values.add(getByteKeyFromStream(start - refStart, data, refStart));
            }
        }

        final int size=keys.size();
        final byte[][] returnKeys=new byte[size][];
        final byte[][] returnValues=new byte[size][];

        for(int a=0;a0){
                        if(raw[i]=='<' && raw[i+1]=='<'){
                            i += 2;
                            reflevel++;
                        }else if(raw[i]=='(' ){ //allow for << (>>) >>

                            i++;
                            while(raw[i]!=')' || ObjectUtils.isEscaped(raw, i)) {
                                i++;
                            }

                        }else if(raw[i]=='>' && i+1==raw.length){
                            reflevel=0;
                        }else if(raw[i]=='>' && raw[i+1]=='>'){
                            i += 2;
                            reflevel--;
                        }else {
                            i++;
                        }
                    }
                }else if(raw[i]=='['){

                    i++;
                    int reflevel=1;

                    while(reflevel>0){

                        if(raw[i]=='(' ){ //allow for [[ in stream ie [/Indexed /DeviceRGB 255 (abc[[z

                            i++;
                            while(raw[i]!=')' || ObjectUtils.isEscaped(raw, i)) {
                                i++;
                            }

                        }else if(raw[i]=='[' ){
                            reflevel++;
                        }else if(raw[i]==']'){
                            reflevel--;
                        }

                        i++;
                    }
                    i--;
                }else if(StreamReaderUtils.isNull(raw,i)){ //allow for null
                    i += 4;
                }else{ //must be a ref

                    //assume not object and reset below if wrong
                    status=PdfObject.UNDECODED_REF;

                    while(raw[i]!='R' || raw[i-1]=='e') { //second condition to stop spurious match on DeviceRGB
                        i++;

                        if(i==raw.length) {
                            break;
                        }
                    }
                    i++;

                    if(i>=raw.length) {
                        i = raw.length - 1;
                    }
                }
            }

        valueObj.setStatus(status);
        if(status!=PdfObject.DECODED){

            final int StrLength=i-start;
            final byte[] unresolvedData=new byte[StrLength];
            System.arraycopy(raw, start, unresolvedData, 0, StrLength);

            //check for returns in data if ends with R and correct to space
            if(unresolvedData[StrLength-1]==82){

                for(int jj=0;jj') //move back so loop works
        {
            i--;
        }
        return i;
    }


    /**
     * @param pdfObject
     * @param objectRef
     * @param i
     * @param raw
     * @param PDFkeyInt - -1 will store in pdfObject directly, not as separate object
     * @return
     */
    public static int readDictionaryFromRefOrDirect(final PdfObject pdfObject, final String objectRef, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {

        readDictionaryFromRefOrDirect:
        while (true) {
            
            i=StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
            
            if(raw[i]=='%'){
                i=StreamReaderUtils.skipComment(raw, i);
                i=StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
            }
            
            if (raw[i] == 60) { //[<>]

                i = handlePairs(pdfObject, objectRef, i, raw, PDFkeyInt);

                if(i<0) {
                    i=-i;
                }else{
                    i =  DirectDictionaryToObject.convert(pdfObject, objectRef, i, raw, PDFkeyInt,objectReader);
                }

            } else if (raw[i] == 47) { //direct value such as /DeviceGray
                
                i = ObjectUtils.setDirectValue(pdfObject, i, raw, PDFkeyInt);
                
            } else { // ref or [ref]
                
                int j = i, ref, generation;
                byte[] data = raw;
                
                while (true) {
                    
                    //allow for [ref] at top level (may be followed by gap
                    j=StreamReaderUtils.skipSpacesOrOtherCharacter(data, j, 91);
            
                    //trap empty arrays ie [ ]
                    //ie 13jun/Factuur 2106010.PDF
                    if (data[j] == ']') {
                        return j;
                    }

                    // trap nulls  as well
                    boolean hasNull = false;
                    int keyStart;
                    int[] values;

                    while (true) {

                        //trap null arrays ie [null null]
                        if (hasNull && data[j] == ']') {
                            return j;
                        }
                        values = StreamReaderUtils.readRefFromStream(data, j);
                        ref = values[0];

                        keyStart = j;
                        j=StreamReaderUtils.skipToEndOfRef(data, j);
                        j=StreamReaderUtils.skipSpaces(data, j);

                        //handle nulls
                        if (ref != 69560 || data[keyStart] != 'n') {
                            break; //not null
                        } else {
                            hasNull = true;
                            if (data[j] == '<') { // /DecodeParms [ null << /K -1 /Columns 1778 >>  ] ignore null and jump down to enclosed Dictionary
                                i = j;
                                continue readDictionaryFromRefOrDirect;

                            }
                        }
                    }

                    generation = values[1];
                    j = values[2];

                    data = objectReader.readObjectAsByteArray(pdfObject, objectReader.isCompressed(ref, generation), ref, generation);
                    
                    //allow for data in Linear object not yet loaded
                    if (data == null) {
                        pdfObject.setFullyResolved(false);
                        
                        return raw.length;
                    }
                    
                    //disregard corrputed data from start of file
                    if (data != null && data.length > 4 && data[0] == '%' && data[1] == 'P' && data[2] == 'D' && data[3] == 'F') {
                        data = null;
                    }else if(StreamReaderUtils.isNull(data,0)){
                        data=null;
                    }
                    
                    if (data == null) {
                        break;
                    }
                    
                    /*
                     * get not indirect and exit if not
                     */
                    int j2 = 0;
                    
                    //allow for [91 0 r]
                    if (data[j2] != '[' && data[0] != '<' && data[1] != '<') {
                        
                        while (j2 < 3 || (j2 > 2 && data[j2 - 1] != 106 && data[j2 - 2] != 98 && data[j2 - 3] != 111)) {
                            
                            //allow for /None as value
                            if (data[j2] == '/') {
                                break;
                            }
                            j2++;
                        }

                        j2=StreamReaderUtils.skipSpaces(data,j2);
                    }
                    
                    //if indirect, round we go again
                    if (data[j2] != 91) {
                        j = 0;
                        break;
                    }else if(data[j2]=='[' && data[j2+1]=='<'){
                        j2++;
                        j=j2;
                        break;
                    }
                    
                    j = j2;
                }
                
                //allow for no data found (ie /PDFdata/baseline_screens/debug/hp_broken_file.pdf)
                if (data != null) {
                    i = handlePairs(pdfObject, objectRef, i, raw, PDFkeyInt);

                    if(i<0) {
                        i=-i;
                    }else{
                        i=readObj(j, data, raw, ref, generation, i, pdfObject, PDFkeyInt, objectReader);
                    }
                }
            }
            
            return i;
        }
    }

    static int handlePairs(final PdfObject pdfObject, final String objectRef, int i, final byte[] raw, final int PDFkeyInt) {

        boolean isPairs=false;

        //@zain @bethan - you will need to enable here
        //do this third

        //we need to avoid this for AA as D can occur in there as a Dictionary
        final int parentType=pdfObject.getPDFkeyInt();

        if((parentType!= PdfDictionary.AA) &&
                (PDFkeyInt==PdfDictionary.N || PDFkeyInt==PdfDictionary.R || PDFkeyInt==PdfDictionary.D || PDFkeyInt==PdfDictionary.Dests)){
            isPairs=isDictionaryPairs(i, raw);
        }

        if(isPairs){
            final FormObject APobj=new FormObject(objectRef);
            pdfObject.setDictionary(PDFkeyInt, APobj);

            i=-readKeyPairs(raw,  i,  APobj);

        }
        return i;
    }

    private static int readObj(int j, final byte[] data, final byte[] raw, int ref, int generation, int i, final PdfObject pdfObject, final int PDFkeyInt, final PdfFileReader objectReader) {

        /*
        * get id from stream
        */
        j=StreamReaderUtils.skipSpaces(data, j);

        //check not <0){

                    if(raw[end]=='<'&& raw[end+1]=='<'){
                        level2++;
                        end += 2;
                    }else if(raw[end-1]=='>'&& raw[end]=='>'){
                        level2--;
                        if(level2>0) {
                            end += 2;
                        }
                    }else if(raw[end]=='('){ //scan (strings) as can contain >>

                        end++;
                        while(raw[end]!=')' || ObjectUtils.isEscaped(raw, end)) {
                            end++;
                        }
                    }else {
                        end++;
                    }
                }

                inDictionary=false;

            }else if(raw[end]=='R' ){
                inDictionary=false;
            }else if(isKey && (raw[end]==' ' || raw[end]==13 || raw[end]==10 || raw[end]==9)){
                inDictionary=false;
            }else if(raw[end]=='/'){
                inDictionary=false;
                end--;
            }else if(raw[end]=='>' && raw[end+1]=='>'){
                inDictionary=false;
                end--;
            }else {
                end++;
            }
        }

        //boolean save=debugFastCode;
        Dictionary.readDictionary(pdfObject,i, raw, PDFkeyInt, objectReader);

        //use correct value
        return end;
    }




    static int handleValue(final PdfObject pdfObject, int i, final int PDFkeyInt, int j, final int ref, final int generation, final byte[] data, final PdfFileReader objectReader) {

        final int keyStart;
        int keyLength;
        final int dataLen=data.length;

        if (data[j] == 47) {
            j++; //roll on past /

            keyStart = j;
            keyLength = 0;

            //move cursor to end of text
            while (j' && raw[j+1]=='>'){
                level--;
                
                if(level<0){                    
                    break;
                }
            }else if(level==0 && (raw[j]=='[' || (raw[j]=='/' && raw[j+1]=='T' && raw[j+2]=='y' && raw[j+3]=='p')
                    || (raw[j]=='/' && raw[j+1]=='R' && raw[j+2]=='e' && raw[j+3]=='s' && raw[j+4]=='o' && raw[j+5]=='u')
                    || (raw[j]=='s' && raw[j+1]=='t' && raw[j+2]=='r' && raw[j+3]=='e' && raw[j+4]=='a' && raw[j+5]=='m') || (raw[j]=='(' && raw[j+1]== ')'))){
                    j=length;
                    isPair=false;
            }
        }
       
        return isPair;
    }
}





© 2015 - 2024 Weber Informatics LLC | Privacy Policy