All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.linear.LinearParser Maven / Gradle / Ivy

/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2016 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
     This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public
    License as published by the Free Software Foundation; either
    version 2.1 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Lesser General Public License for more details.

    You should have received a copy of the GNU Lesser General Public
    License along with this library; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA


 *
 * ---------------
 * LinearParser.java
 * ---------------
 */
package org.jpedal.linear;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import org.jpedal.FileAccess;
import org.jpedal.exception.PdfException;
import org.jpedal.io.LinearizedHintTable;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.PdfFileReader;
import org.jpedal.io.PdfObjectReader;
import org.jpedal.objects.raw.LinearizedObject;
import org.jpedal.objects.raw.PageObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.parser.PdfStreamDecoder;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.NumberUtils;
import org.jpedal.utils.repositories.FastByteArrayOutputStream;

public class LinearParser {
    
    /**flag if we have tested - reset for every file*/
    public boolean isLinearizationTested;
    
    private PageObject linObject;
    
    private final Map linObjects=new HashMap();
    
    private int linearPageCount=-1;
    
    /**present if file Linearized*/
    private PdfObject linearObj;
    
    /**
     * hold all data in Linearized Obj
     */
    private LinearizedHintTable linHintTable;
    
    private int E=-1;
    
    public org.jpedal.linear.LinearThread linearizedBackgroundReaderer;
    
    public void closePdfFile() {
        
        E=-1;
        linearObj=null;
        isLinearizationTested =false;
        linObjects.clear();
        if(linearizedBackgroundReaderer!=null && linearizedBackgroundReaderer.isAlive()){
            linearizedBackgroundReaderer.interrupt();
        }
        
        //wait to die
        while(linearizedBackgroundReaderer !=null && linearizedBackgroundReaderer.isAlive() && !linearizedBackgroundReaderer.isInterrupted()){
            try{
                Thread.sleep(500);
            }catch(final Exception e){
                LogWriter.writeLog("Exception: " + e.getMessage());
            }
        }
        
        linHintTable=null;
        
    }
    
    private void testForLinearlized(final byte[] buffer, final PdfObjectReader currentPdfFile) {
        int start=0,end=0;
        boolean isLinear=false;
        
        isLinearizationTested =true;
        
        //scan for Linearized in text
        final int len=buffer.length;
        for(int i=0;i1 && linHintTable!=null){
                
                final Integer key= rawPage;
                
                //cached data
                if(linObjects.containsKey(key)){
                    linObject= linObjects.get(key);
                    
                    return true;
                }
                
                final int objID=linHintTable.getPageObjectRef(rawPage);
                
                //return if Page data not available
                final byte[] pageData=linHintTable.getObjData(objID);
                if(pageData!=null){
                    
                    /*
                     * turn page into obj
                     */
                    linObject=new PageObject(objID+" 0 R");
                    linObject.setStatus(PdfObject.UNDECODED_DIRECT);
                    linObject.setUnresolvedData(pageData, PdfDictionary.Page);
                    linObject.isDataExternal(true);
                    
                    final PdfFileReader objectReader=currentPdfFile.getObjectReader();

                    //see if object and all refs loaded otherwise exit
                    if(!ObjectDecoder.resolveFully(linObject, objectReader)) {
                        isPageAvailable = false;
                    } else{  //cache once available
                        
                        /*
                         * check content as well
                         */
                        if(linObject!=null){
                            
                            final byte[] b_data=currentPdfFile.getObjectReader().readPageIntoStream(linObject);
                            
                            if(b_data==null){
                                isPageAvailable=false;
                            }else{
                                //check Resources
                                final PdfObject Resources=linObject.getDictionary(PdfDictionary.Resources);
                                
                                if(Resources==null){
                                    linObject=null;
                                    isPageAvailable=false;
                                }else if(!ObjectDecoder.resolveFully(Resources, objectReader)){
                                    linObject=null;
                                    isPageAvailable=false;
                                }else{
                                    Resources.isDataExternal(true);
                                    new PdfStreamDecoder(currentPdfFile).readResources(Resources,true);
                                    if(!Resources.isFullyResolved()){
                                        linObject=null;
                                        isPageAvailable=false;
                                    }
                                }
                            }
                        }
                        
                        if(isPageAvailable && linObject!=null){
                            linObjects.put(key,linObject);
                        }
                    }
                }else {
                    isPageAvailable = false;
                }
            }else {
                linObject = null;
            }
            
        }catch(final Exception e){
            LogWriter.writeLog("Exception: " + e.getMessage());
           
            isPageAvailable=false;
        }
        
        return isPageAvailable;
    }
    
    public byte[] readLinearData(final PdfObjectReader currentPdfFile, final File tempURLFile, final InputStream is, final FileAccess fileAccess) throws IOException {
        
        final FileChannel fos = new RandomAccessFile(tempURLFile,"rws").getChannel();
        fos.force(true);
        
        final FastByteArrayOutputStream bos=new FastByteArrayOutputStream(8192);
        
        // Download buffer
        final byte[] buffer = new byte[4096];
        int read,bytesRead=0;
        byte[] b;
        
        //main loop to read all the file bytes (carries on in thread if linearized)
        while ((read = is.read(buffer)) != -1) {
            
            if(read>0){
                synchronized (fos){
                    
                    b=new byte[read];
                    System.arraycopy(buffer,0,b,0,read);
                    final ByteBuffer f=ByteBuffer.wrap(b);
                    fos.write(f);
                }
            }
            
            bytesRead += read;
            
            //see if number of bytes loaded
            if(E!=-1){
                
                bos.write(buffer,0,read);
                
                //once correct number of bytes for Linearized object read, start background thread to read rest and process Linearized/page 1
                if(E57) //if its not a number value it looks suspicious
            {
                contentIsDodgy = true;
            }
            
            i++;
        }
        
        //trap for content not correct
        if(!contentIsDodgy){
            
            final int number= NumberUtils.parseInt(keyStart2, i, hintStream);
            
            //generation
            while(hintStream[i]==10 || hintStream[i]==13 || hintStream[i]==32 || hintStream[i]==47 || hintStream[i]==60) {
                i++;
            }
            
            keyStart2=i;
            //move cursor to end of reference
            while(i<10 && hintStream[i]!=10 && hintStream[i]!=13 && hintStream[i]!=32 && hintStream[i]!=47 && hintStream[i]!=60 && hintStream[i]!=62) {
                i++;
            }
            final int generation= NumberUtils.parseInt(keyStart2, i, hintStream);
            
            while(i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy