All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.io.types.RefTable Maven / Gradle / Ivy

/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2016 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * RefTable.java
 * ---------------
 */
package org.jpedal.io.types;

import java.io.IOException;
import org.jpedal.exception.PdfException;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.PdfFileReader;
import org.jpedal.io.RandomAccessBuffer;
import org.jpedal.objects.raw.CompressedObject;
import org.jpedal.objects.raw.PageObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.NumberUtils;

/**
 *
 */
public class RefTable {
    
    
    PdfObject encryptObj;
    
    /**holds file ID*/
    private byte[] ID;
    
    /**pattern to look for in objects*/
    static final String pattern= "obj";
    
    /**info object*/
    private PdfObject infoObject;
    
    static final int UNSET=-1;
    static final int COMPRESSED=1;
    static final int LEGACY=2;
    
    private RandomAccessBuffer pdf_datafile;
    
    static final byte[] oldPattern = {'x','r','e','f'};
    
    private final long eof;
    
    final Offsets offset;
    
    public RefTable(final RandomAccessBuffer pdf_datafile, final long eof, final Offsets offset) {
        this.pdf_datafile=pdf_datafile;
        this.eof=eof;
        this.offset=offset;
    }
    
    /**
     * read first start ref from last 1024 bytes
     */
    private int readFirstStartRef() throws PdfException {
        
        //reset flag
        offset.setRefTableInvalid(false);
        
        
        int pointer = -1;
        int i = 1019;
        final StringBuilder startRef = new StringBuilder(10);
        
        /* move to end of file and read last 1024 bytes*/
        final int block=1024;
        byte[] lastBytes = new byte[block];
        long end;
        
        /*
         * set endpoint, losing null chars and anything before EOF
         */
        final int[] EndOfFileMarker={37,37,69,79};
        int valReached=3;
        boolean EOFFound=false;
        try {
            end=eof;
            
            /*
             * lose nulls and other trash from end of file
             */
            final int bufSize=255;
            while(true){
                final byte[] buffer=getBytes(end - bufSize, bufSize);
                
                int offset=0;
                
                for(int ii=bufSize-1;ii>-1;ii--){
                    
                    //see if we can decrement EOF tracker or restart check
                    if(!EOFFound) {
                        valReached = 3;
                    }
                    
                    if(buffer[ii]==EndOfFileMarker[valReached]){
                        valReached--;
                        EOFFound=true;
                    }else {
                        EOFFound = false;
                    }
                    
                    //move to next byte
                    offset--;
                    
                    if(valReached<0) {
                        ii = -1;
                    }
                    
                }
                
                //exit if found values on loop
                if(valReached<0){
                    end -= offset;
                    break;
                }else{
                    end -= bufSize;
                }
                
                //allow for no eof
                if(end<0){
                    end=eof;
                    break;
                }
            }
            
            //end=end+bufSize;
            
            //allow for very small file
            int count=(int)(end - block);
            
            if(count<0){
                count=0;
                final int size=(int)eof;
                lastBytes=new byte[size];
                i=size+3; //force reset below
            }
            
            lastBytes=getBytes(count, lastBytes.length);
            
        } catch (final Exception e) {
            LogWriter.writeLog("Exception " + e + " reading last 1024 bytes");
            
            throw new PdfException( e + " reading last 1024 bytes");
        }
        
        //		for(int ii=0;iifileSize) {
            i = fileSize - 5;
        }
        
        while (i >-1) {
            
            //first check is because startref works as well a startxref !!
            if (((lastBytes[i] == 116 && lastBytes[i + 1] == 120) || (lastBytes[i] == 114 && lastBytes[i + 1] == 116))
                    && (lastBytes[i + 2] == 114)
                    && (lastBytes[i + 3] == 101)
                    && (lastBytes[i + 4] == 102)) {
                break;
            }
            
            
            i--;
            
        }
        
        /*trap buggy files*/
        if(i==-1){
            try {
                closeFile();
            } catch (final IOException e1) {
                LogWriter.writeLog("Exception " + e1 + " closing file");
            }
            throw new PdfException( "No Startxref found in last 1024 bytes ");
        }
        
        i += 5; //allow for word length
        
        //move to start of value ignoring spaces or returns
        while (i < 1024 && (lastBytes[i] == 10 || lastBytes[i] == 32 || lastBytes[i] == 13)) {
            i++;
        }
        
        //move to start of value ignoring spaces or returns
        while ((i < 1024)
                && (lastBytes[i] != 10)
                && (lastBytes[i] != 32)
                && (lastBytes[i] != 13)) {
            startRef.append((char) lastBytes[i]);
            i++;
        }
        
        /*convert xref to string to get pointer*/
        if (startRef.length() > 0) {
            pointer = Integer.parseInt(startRef.toString());
        }
        
        if (pointer == -1){
            LogWriter.writeLog("No Startref found in last 1024 bytes ");
            
            try {
                closeFile();
            } catch (final IOException e1) {
                LogWriter.writeLog("Exception " + e1 + " closing file");
            }
            throw new PdfException( "No Startref found in last 1024 bytes ");
        }
        
        return pointer;
    }
    
    
    
    /**
     * read reference table start to see if new 1.5 type or traditional xref
     * @throws PdfException
     */
    public final PdfObject readReferenceTable(final PdfObject linearObj, final PdfFileReader currentPdfFile, final ObjectReader objectReader) throws PdfException {
        
        int pointer = -1;
        final int eof = (int) this.eof;

        boolean islinearizedCompressed = false;
        
        if (linearObj == null) {
            pointer = readFirstStartRef();
        } else { //find at start of Linearized
            final byte[] data = pdf_datafile.getPdfBuffer();
            
            
            final int count = data.length;
            int ptr = 5;
            for (int i = 0; i < count; i++) {
                
                //track start of this object (needed for compressed)
                if (data[i] == 'e' && data[i + 1] == 'n' && data[i + 2] == 'd' && data[i + 3] == 'o' && data[i + 4] == 'b' && data[i + 5] == 'j') {
                    ptr = i + 6;
                    
                }
                
                if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') {
                    pointer = i;
                    i = count;
                }else if (data[i] == 'X' && data[i + 1] == 'R' && data[i + 2] == 'e' && data[i + 3] == 'f') {
                    
                    islinearizedCompressed = true;
                    
                    pointer = ptr;
                    while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) {
                        pointer++;
                    }
                    
                    i = count;
                }
            }
        }
        
        offset.addXref(pointer);
        
        PdfObject rootObj=null;
        
        if (pointer >= eof || pointer==0) {
            
            LogWriter.writeLog("Pointer not if file - trying to manually find startref");
            
            offset.setRefTableInvalid(true);
            
            try{
                rootObj=new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
            }catch(Error err){
                throw new PdfException(err.getMessage()+" attempting to manually scan file for objects");
            }
            
            currentPdfFile.readObject(rootObj);
            return rootObj;
            
        } else if (islinearizedCompressed || isCompressedStream(pointer, eof)) {
            return readCompressedStream(rootObj,pointer, currentPdfFile, objectReader,linearObj);
        } else {
            return readLegacyReferenceTable(rootObj,pointer, eof,currentPdfFile);
        }
            
        
    }
    
    
    /**
     * read reference table from file so we can locate
     * objects in pdf file and read the trailers
     */
    private PdfObject readLegacyReferenceTable(PdfObject rootObj,int pointer, final int eof, final PdfFileReader currentPdfFile) throws PdfException {
        
        
        int endTable, current = 0; //current object number
        byte[] Bytes  ;
        int bufSize = 1024;
        
        /*read and decode 1 or more trailers*/
        while (true) {
            
            try {
                
                //allow for pointer outside file
                Bytes=Trailer.readTrailer(bufSize, pointer, eof,pdf_datafile);
                
            } catch (final Exception e) {
                
                try {
                    closeFile();
                } catch (final IOException e1) {
                    LogWriter.writeLog("Exception " + e + " closing file "+e1);
                }
                throw new PdfException("Exception " + e + " reading trailer");
            }
            
            if (Bytes == null) //safety catch
            {
                break;
            }
            
            //get trailer
            int i = 0;
            
            final int maxLen=Bytes.length;
            boolean trailerNotFound=true;
            
            //for(int a=0;a<100;a++)
            //	System.out.println((char)Bytes[i+a]);
            while (i >
            int level=0;
            while(true){
                
                if(Bytes[i] == 60 && Bytes[i - 1] == 60){
                    level++;
                    i++;
                }else if(Bytes[i] =='['){
                    i++;
                    while(Bytes[i]!=']'){
                        i++;
                        if(i==Bytes.length) {
                            break;
                        }
                    }
                }else if(Bytes[i] ==62 && Bytes[i - 1] ==62){
                    level--;
                    i++;
                }
                
                if(level==0) {
                    break;
                }
                
                i++;
            }
            
            //handle optional XRefStm
            final int XRefStm=pdfObject.getInt(PdfDictionary.XRefStm);
            
            if(XRefStm!=-1){
                pointer=XRefStm;
            }else{ //usual way
                
                boolean hasRef=true;
                
                i = StreamReaderUtils.skipSpaces(Bytes, i);
                while (Bytes[i] =='%'){
                    while(Bytes[i]!=10){
                        
                        i++;
                    }
                    i++;
                }
                /* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
                /**/
                
                //look for xref as end of startref
                while (Bytes[i] != 116 && Bytes[i + 1] != 120 &&
                        Bytes[i + 2] != 114 && Bytes[i + 3] != 101 && Bytes[i + 4] != 102){
                    
                    if(Bytes[i]=='o' && Bytes[i+1]=='b' && Bytes[i+2]=='j'){
                        hasRef=false;
                        break;
                    }
                    i++;
                }
                
                if(hasRef){
                    
                    i += 8;
                    //move to start of value ignoring spaces or returns
                    while ((i < maxLen)&& (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) {
                        i++;
                    }
                    
                    final int s=i;
                    
                    //allow for characters between xref and startref
                    while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) {
                        i++;
                    }
                    
                    /*convert xref to string to get pointer*/
                    if (s!=i) {
                        pointer = NumberUtils.parseInt(s, i, Bytes);
                    }
                    
                }
            }
            
            i = StreamReaderUtils.skipSpaces(Bytes, 0);
            
            if (pointer == -1){
                LogWriter.writeLog("No startRef");
                
                /*now read the objects for the trailers*/
            } else if (Bytes[i] == 120 && Bytes[i+1] == 114 && Bytes[i+2] == 101 && Bytes[i+3] == 102) { //make sure starts xref
                
               i = StreamReaderUtils.skipSpaces(Bytes, 5);
                
                current = offset.readXRefs(current, Bytes, endTable, i,eof,pdf_datafile);
                
                /*now process trailer values - only first set of table values for root, encryption and info*/
                if (rootObj==null) {
                    
                    rootObj=pdfObject.getDictionary(PdfDictionary.Root);
                    
                    encryptObj=pdfObject.getDictionary(PdfDictionary.Encrypt);
                    if(encryptObj!=null){
                        
                        final byte[][] IDs=pdfObject.getStringArray(PdfDictionary.ID);
                        if(IDs!=null && this.ID==null) {
                            // only the first encountered ID should be used as a fileID for decryption
                            this.ID = IDs[0];
                        }
                    }
                    
                    infoObject=pdfObject.getDictionary(PdfDictionary.Info);
                    
                }
                
                //make sure first values used if several tables and code for prev
                pointer=pdfObject.getInt(PdfDictionary.Prev);
                
                //see if other trailers
                if (pointer!=-1 && pointer=0){
            try {
                pdf_datafile.seek(start);
                pdf_datafile.read(buffer); //get next chars
            } catch (final IOException e) {
                LogWriter.writeLog("Exception: " + e.getMessage());
            }
        }
        
        return buffer;
    }
    
    void closeFile() throws IOException {
        
        if(pdf_datafile!=null){
            pdf_datafile.close();
            pdf_datafile=null;
        }
    }
    
    
    /**
     * test first bytes to see if new 1.5 style table with obj or contains ref
     * @throws PdfException
     */
    private boolean isCompressedStream(int pointer, final int eof) throws PdfException {
        
        final boolean debug=false;
        
        int bufSize = 50,charReached_legacy=0, charReached_comp1=0,charReached_comp2=0;
        
        final int[] objStm={'O','b','j','S','t','m'};
        final int[] XRef={'X','R','e','f'};
        
        int type=UNSET;
        
        //flag to show if at start of data for check
        boolean firstRead=true;
        
        while (true) {
            
            /* adjust buffer if less than 1024 bytes left in file */
            if (pointer + bufSize > eof) {
                bufSize = eof - pointer;
            }
            
            if(bufSize<0) {
                bufSize = 50;
            }

            if (pointer < 0) {
                pointer += bufSize;
                continue;
            }

            final byte[] buffer = getBytes(pointer, bufSize);
            
            //allow for fact sometimes start of data wrong
            if(firstRead && buffer[0]=='r' && buffer[1]=='e' && buffer[2]=='f') {
                charReached_legacy = 1;
            }
            
            firstRead=false; //switch off
            
            /*look for xref or obj */
            for (int i = 0; i < bufSize; i++) {
                
                final byte currentByte = buffer[i];
                
                if(debug) {
                    System.out.print((char) currentByte);
                }
                
                /* check for xref OR end - reset if not */
                if (currentByte == oldPattern[charReached_legacy] && type!=COMPRESSED){
                    charReached_legacy++;
                    type=LEGACY;
                }else if ((currentByte == objStm[charReached_comp1] )&& (charReached_comp1==0 || type==COMPRESSED)){
                    
                    charReached_comp1++;
                    type=COMPRESSED;
                }else if ((currentByte == XRef[charReached_comp2] )&& (charReached_comp2==0 || type==COMPRESSED)){
                    
                    charReached_comp2++;
                    type=COMPRESSED;
                }else{
                    
                    charReached_legacy=0;
                    charReached_comp1=0;
                    charReached_comp2=0;
                    
                    type=UNSET;
                }
                
                if (charReached_legacy==3 || charReached_comp1==4 || charReached_comp2 == 3) {
                    break;
                }
                
            }
            
            if (charReached_legacy==3 || charReached_comp1==4 || charReached_comp2 == 3) {
                break;
            }
            
            //update pointer
            pointer += bufSize;
            
        }
        
        /*
         * throw exception if no match or tell user which type
         */
        if(type==UNSET){
            try {
                closeFile();
            } catch (final IOException e1) {
                LogWriter.writeLog("Exception " + 1 + " closing file "+e1);
            }
            throw new PdfException("Exception unable to find ref or obj in trailer");
        }
        
        return type == COMPRESSED;
    }
    
    public PdfObject getInfoObject() {
        return infoObject;
    }
    
    public PdfObject getEncryptionObject() {
        return encryptObj;
    }
    
    public byte[] getID(){
        return ID;
    }
    
    //////////////////////////////////////////////////////////////////////////
    /**
     * returns current location pointer and sets to new value
     */
    public void movePointer(final long pointer)
    {
        try {
            //make sure inside file
            if( pointer > pdf_datafile.length() ){
            	
            	LogWriter.writeLog("Attempting to access ref outside file");
            }else{
                pdf_datafile.seek( pointer );
            }
        }catch( final Exception e ){
        	LogWriter.writeLog("Exception " + e + " moving pointer to  " + pointer + " in file.");
        }
    }
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy