All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.io.types.ObjectReader Maven / Gradle / Ivy

/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * ObjectReader.java
 * ---------------
 */
package org.jpedal.io.types;

import java.io.IOException;

import org.jpedal.io.ObjectUtils;
import org.jpedal.io.PdfFileReader;
import org.jpedal.io.RandomAccessBuffer;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.LogWriter;

/**
 *
 */
public class ObjectReader {

    public boolean fileIsBroken;

    static final byte[] endPattern = {101, 110, 100, 111, 98, 106}; //pattern endobj

    public static final byte[] lengthString = {47, 76, 101, 110, 103, 116, 104}; //pattern /Length

    public static final byte[] startStream = {115, 116, 114, 101, 97, 109};

    private final RandomAccessBuffer pdf_datafile;
    private int newCacheSize = -1;

    private final long eof;

    final PdfFileReader currentPdfFile;

    public ObjectReader(final RandomAccessBuffer pdf_datafile, final long eof, final PdfFileReader currentPdfFile) {
        this.pdf_datafile = pdf_datafile;
        this.eof = eof;
        this.currentPdfFile = currentPdfFile;

    }

    public byte[] readObjectData(int bufSize, final PdfObject pdfObject) {

        //old version
        if (bufSize < 1 || newCacheSize != -1 || fileIsBroken) {
            return readUnsizedObjectData(bufSize, pdfObject);
        }

        byte[] dataRead = null;

        //trap for odd file with no endobj
        if (bufSize > 0) {

            bufSize += 6;
            dataRead = new byte[bufSize];

            try {
                pdf_datafile.read(dataRead);
            } catch (final IOException ex) {
                LogWriter.writeLog("Unable to fill buffer " + ex);
            }
        }

        return dataRead;
    }

    private byte[] readUnsizedObjectData(int bufSize, final PdfObject pdfObject) {

        int newCacheSize = -1, startStreamCount = 0, charReached = 0, charReached3 = 0;
        boolean startStreamFound = false;
        boolean reachedCacheLimit = false;
        final boolean inStream = false;
        boolean inLoop = true;

        if (pdfObject != null) //only use if values found
        {
            newCacheSize = this.newCacheSize;
        }

        final int rawSize = bufSize;
        int realPos = 0;
        boolean lengthSet = false; //start false and set to true if we find /Length in metadata

        if (bufSize < 1) {
            bufSize = 128;
        }

        if (newCacheSize != -1 && bufSize > newCacheSize) {
            bufSize = newCacheSize;
        }

        byte[] dataRead = null;
        byte currentByte;
        int i = bufSize - 1;

        /*read the object or block adjust buffer if less than bytes left in file*/
        while (inLoop) {

            i++;

            if (i == bufSize) {

                /* read the next block and adjust buffer if less than bytes left in file*/
                final long pointer = getPointer();

                if (pointer + bufSize > eof) {
                    bufSize = (int) (eof - pointer);
                }

                //trap for odd file with no endobj
                if (bufSize == 0) {
                    break;
                }

                bufSize += 6;
                byte[] buffer = new byte[bufSize];
                try {
                    pdf_datafile.read(buffer); //get data
                } catch (final IOException ex) {
                    LogWriter.writeLog("Unable to fill buffer " + ex);
                }

                /*
                 * allow for offset being wrong on first block and hitting part of endobj and cleanup so does not break later code
                 * and set DataRead to buffer
                 */
                if (dataRead == null) {
                    int j = 0;

                    //check first 10 bytes
                    for (int i2 = 0; i2 < 10; i2++) {
                        if (buffer[i2] == 'e' && buffer[i2 + 1] == 'n' && buffer[i2 + 2] == 'd' && buffer[i2 + 3] == 'o' && buffer[i2 + 4] == 'b' && buffer[i2 + 5] == 'j') {
                            j = i2;
                            break;
                        }
                    }

                    while (buffer[j] == 'e' || buffer[j] == 'n' || buffer[j] == 'd' || buffer[j] == 'o' || buffer[j] == 'b' || buffer[j] == 'j') {
                        j++;
                    }

                    if (j > 0) { //adjust to remove stuff at start
                        final byte[] oldBuffer = buffer;
                        final int newLength = buffer.length - j;
                        buffer = new byte[newLength];
                        System.arraycopy(oldBuffer, j, buffer, 0, newLength);

                        bufSize = buffer.length;
                    }

                    dataRead = buffer;

                } else {
                    dataRead = appendDataBlock(buffer.length, buffer, dataRead);
                }

                i = 0;
            }

            currentByte = dataRead[realPos];

            if (!inStream) { //check for endobj at end - reset if not
                if (currentByte == endPattern[charReached]) {
                    charReached++;
                } else {
                    charReached = 0;
                }
            }

            //look for start of stream and set inStream true
            if (!startStreamFound && newCacheSize != -1 && !reachedCacheLimit) {
                if (startStreamCount < 6 && currentByte == startStream[startStreamCount]) {
                    startStreamCount++;

                    if (startStreamCount == 6) //stream start found so log
                    {
                        startStreamFound = true;
                    }
                } else {
                    startStreamCount = 0;
                }
            }

            //switch on caching
            if (!reachedCacheLimit && startStreamFound && dataRead != null && dataRead.length > newCacheSize) { //stop if over max size

                if (!reachedCacheLimit) { //only set on first time
                    pdfObject.setCache(currentPdfFile);
                }
                reachedCacheLimit = true;
            }

            //also scan for /Length if it had a valid size - if length not set we go on endstream in data
            if (!startStreamFound && !lengthSet && rawSize != -1) {
                if (currentByte == lengthString[charReached3] && !inStream) {
                    charReached3++;
                    if (charReached3 == 6) {
                        lengthSet = true;
                    }
                } else {
                    charReached3 = 0;
                }
            }

            realPos++;

            if (charReached == 6) {

                if (!lengthSet) {
                    inLoop = false;
                }

                charReached = 0;
            }

            if (lengthSet && realPos > rawSize) {
                inLoop = false;
            }
        }

        if (!lengthSet) {
            dataRead = ObjectUtils.checkEndObject(dataRead);
        }

        return dataRead;
    }

    static byte[] appendDataBlock(final int newBytes, final byte[] buffer, final byte[] dataRead) {

        final int bytesRead = dataRead.length;

        final byte[] tmp = new byte[bytesRead + newBytes];

        //existing data into new array
        System.arraycopy(dataRead, 0, tmp, 0, bytesRead);
        System.arraycopy(buffer, 0, tmp, bytesRead, newBytes);

        return tmp;
    }

    /**
     * gets pointer to current location in the file
     */
    private long getPointer() {
        long old_pointer = 0;
        try {
            old_pointer = pdf_datafile.getFilePointer();
        } catch (final Exception e) {
            LogWriter.writeLog("Exception " + e + " getting pointer in file");
        }
        return old_pointer;
    }

    /**
     * set size over which objects kept on disk
     */
    public void setCacheSize(final int miniumumCacheSize) {

        newCacheSize = miniumumCacheSize;

    }
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy