All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jpedal.parser.PdfStreamDecoder Maven / Gradle / Ivy

There is a newer version: 7.15.25
Show newest version
/*
 * ===========================================
 * Java Pdf Extraction Decoding Access Library
 * ===========================================
 *
 * Project Info:  http://www.idrsolutions.com
 * Help section for developers at http://www.idrsolutions.com/support/
 *
 * (C) Copyright 1997-2017 IDRsolutions and Contributors.
 *
 * This file is part of JPedal/JPDF2HTML5
 *
 @LICENSE@
 *
 * ---------------
 * PdfStreamDecoder.java
 * ---------------
 */
package org.jpedal.parser;

import java.awt.Graphics2D;
import java.awt.Rectangle;
import java.awt.Shape;

import org.jpedal.PdfDecoderInt;
import org.jpedal.color.PdfPaint;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.external.*;
import org.jpedal.fonts.PdfFont;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.javafx.JavaFXSupport;
import org.jpedal.fonts.glyph.T3Size;
import org.jpedal.images.SamplingFactory;
import org.jpedal.io.DefaultErrorTracker;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.ObjectStore;
import org.jpedal.io.PdfObjectFactory;
import org.jpedal.io.PdfObjectReader;
import org.jpedal.io.StatusBar;
import org.jpedal.objects.*;
import org.jpedal.objects.layers.PdfLayerList;
import org.jpedal.objects.raw.ExtGStateObject;
import org.jpedal.objects.raw.MCObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.objects.raw.XObject;
import org.jpedal.parser.color.*;
import org.jpedal.parser.gs.CM;
import org.jpedal.parser.gs.Q;
import org.jpedal.parser.image.DO;
import org.jpedal.parser.image.ID;
import org.jpedal.parser.image.ImageDecoder;
import org.jpedal.parser.shape.*;
import org.jpedal.parser.text.*;
import org.jpedal.render.DynamicVectorRenderer;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.repositories.Vector_Int;
import org.jpedal.utils.repositories.generic.Vector_Rectangle_Int;

/**
 * Contains the code which 'parses' the commands in the stream and extracts the
 * data (images and text). Users should not need to call it.
 */
public class PdfStreamDecoder extends BaseDecoder {

    private static boolean showFXShadingMessage;

    protected org.jpedal.objects.structuredtext.StructuredContentHandler contentHandler;

    int formLevel;

    private int BDCDepth = -1;

    PdfObjectCache cache;

    PdfPageData pageData;

    ErrorTracker errorTracker;

    PdfObjectReader currentPdfFile;

    protected GraphicsState newGS;

    protected byte[] pageStream;

    PdfLayerList layers;

    protected boolean getSamplingOnly;

    private boolean isTTHintingRequired;

    final Vector_Int textDirections = new Vector_Int();

    final Vector_Rectangle_Int textAreas = new Vector_Rectangle_Int();

    /**
     * shows if t3 glyph uses internal colour or current colour
     */
    public boolean ignoreColors;

    /**
     * images on page
     */
    int imageCount;

    String lastTextValue = "";

    //trap for recursive loop of xform calling itself
    int lastDataPointer = -1;

    private T3Decoder t3Decoder;

    /**
     * flag to show if we REMOVE shapes
     */
    private boolean removeRenderImages;

    //last Trm incase of multple Tj commands
    private boolean multipleTJs;

    /**
     * flags to show we need colour data as well
     */
    private boolean textColorExtracted;

    /**
     * flag to show text is being extracted
     */
    private boolean textExtracted = true;

    /**
     * flag to show content is being rendered
     */
    private boolean renderText;

    private int tokenNumber;

    /**
     * lisßt of images used for display
     */
    String imagesInFile;

    //set threshold - value indicates several possible values
    public static final float currentThreshold = 0.595f;

    protected ImageHandler customImageHandler;

    private PdfFontFactory pdfFontFactory;

    boolean isXMLExtraction;

    /**
     * internal development flag which should not be used
     */
    //turn on debugging to see commands
    public static final boolean showCommands = false;
//    public static boolean showCommands = true;

    /**
     * interactive display
     */
    private StatusBar statusBar;

    /**
     * store text data and can be passed out to other classes
     */
    final PdfData pdfData = new PdfData();

    /**
     * store image data extracted from pdf
     */
    final PdfImageData pdfImages = new PdfImageData();

    /**
     * used to debug
     */
    protected static String indent = "";

    /**
     * show if possible error in stream data
     */
    protected boolean isDataValid = true;

    /**
     * used to store font information from pdf and font functionality
     */
    private PdfFont currentFontData;

    protected ObjectStore objectStoreStreamRef;

    String formName = "";

    public static boolean useTextPrintingForNonEmbeddedFonts;

    /**
     * allows us to terminate file if looks like might crash JVM due to complexity
     */
    private static int maxShapesAllowed = -1;

    // Used to get the blendmode of an object in PDFObjectToImage
    private int currentBlendMode = PdfDictionary.Normal;

    static {
        SamplingFactory.setDownsampleMode(null);

        /*
         * we have PDFs which crashes JVM so workaround to avoid this.
         */
        final String maxShapes = System.getProperty("org.jpedal.maxShapeCount");
        if (maxShapes != null) {
            try {
                maxShapesAllowed = Integer.parseInt(maxShapes);
            } catch (final Exception e) {
                throw new RuntimeException("Your setting (" + maxShapes + ")for org.jpedal.maxShapeCount is not a valid number " + e);
            }
        }
    }

    public PdfStreamDecoder(final PdfObjectReader currentPdfFile) {

        init(currentPdfFile);

    }

    /**
     * create new StreamDecoder to create display
     */
    public PdfStreamDecoder(final PdfObjectReader currentPdfFile, final PdfLayerList layers) {

        if (layers != null) {
            this.layers = layers;
        }

        init(currentPdfFile);
    }

    private void init(final PdfObjectReader currentPdfFile) {

        cache = new PdfObjectCache();
        gs = new GraphicsState();
        errorTracker = new DefaultErrorTracker();
        pageData = new PdfPageData();

        StandardFonts.checkLoaded(StandardFonts.STD);
        StandardFonts.checkLoaded(StandardFonts.MAC);

        this.currentPdfFile = currentPdfFile;
        pdfFontFactory = new PdfFontFactory(currentPdfFile);

    }

    /**
     * objects off the page, stitch into a stream and decode and put into our
     * data object. Could be altered if you just want to read the stream
     *
     * @param pdfObject
     * @throws PdfException
     */
    public T3Size decodePageContent(final PdfObject pdfObject) throws PdfException {

        try {

            //check switched off
            parserOptions.imagesProcessedFully = true;
            parserOptions.tooManyShapes = false;

            //reset count
            imageCount = 0;

            parserOptions.setPdfLayerList(this.layers);

            //reset count
            imagesInFile = null; //also reset here as good point as syncs with font code

            if (!parserOptions.renderDirectly() && statusBar != null) {
                statusBar.percentageDone = 0;
            }

            if (newGS != null) {
                gs = newGS;
            } else {
                gs = new GraphicsState(0, 0);
            }

            //save for later
            if (parserOptions.isRenderPage()) {

                /*
                 * check setup and throw exception if null
                 */
                if (current == null) {
                    throw new PdfException("DynamicVectorRenderer not setup PdfStreamDecoder setStore(...) should be called");
                }

                current.drawClip(gs, parserOptions.defaultClip, false);

                final int pageNum = parserOptions.getPageNumber();

                //Paint background here to ensure we all for changed background color in extraction modes
                current.writeCustom(DynamicVectorRenderer.PAINT_BACKGROUND, new Rectangle(pageData.getCropBoxX(pageNum), pageData.getCropBoxY(pageNum),
                        pageData.getCropBoxWidth(pageNum), pageData.getCropBoxHeight(pageNum)));
            }

            //get the binary data from the file
            final byte[] b_data;

            byte[][] pageContents = null;
            if (pdfObject != null) {
                pageContents = pdfObject.getKeyArray(PdfDictionary.Contents);
                isDataValid = pdfObject.streamMayBeCorrupt();
            }

            if (pdfObject != null && pageContents == null) {
                b_data = currentPdfFile.readStream(pdfObject, true, true, false, false, false, pdfObject.getCacheName(currentPdfFile.getObjectReader()));
            } else if (pageStream != null) {
                b_data = pageStream;
            } else {
                b_data = currentPdfFile.getObjectReader().readPageIntoStream(pdfObject);
            }

            //trap for recursive loop of xform calling itself
            lastDataPointer = -1;

            //if page data found, turn it into a set of commands
            //and decode the stream of commands
            if (b_data != null && b_data.length > 0) {
                decodeStreamIntoObjects(b_data, false);
            }

            //flush fonts
            if (!parserOptions.isType3Font()) {
                cache.resetFonts();
            }

            final T3Size t3 = new T3Size();
            if (t3Decoder != null) {
                t3.x = t3Decoder.T3maxWidth;
                t3.y = t3Decoder.T3maxHeight;
                ignoreColors = t3Decoder.ignoreColors;
                t3Decoder = null;
            }

            return t3;

        } catch (final Error err) {

            LogWriter.writeLog("Error " + err);

            if (ExternalHandlers.throwMissingCIDError && err.getMessage() != null && err.getMessage().contains("kochi")) {
                throw err;
            }

            errorTracker.addPageFailureMessage("Problem decoding page " + err);

        }

        return null;
    }

    public void setObjectValue(final int key, final Object obj) {

        switch (key) {

            case ValueTypes.Name:
                parserOptions.setName((String) obj);
                break;

            case ValueTypes.PDFPageData:
                pageData = (PdfPageData) obj;
                //flag if colour info being extracted
                if (textColorExtracted) {
                    pdfData.enableTextColorDataExtraction();
                }

                break;

            /*
                 * pass in status bar object
                 *
             */
            case ValueTypes.StatusBar:
                this.statusBar = (StatusBar) obj;
                break;

            case ValueTypes.PdfLayerList:
                this.layers = (PdfLayerList) obj;
                break;

            /*
                 * used internally for structured content extraction.
             */
            case ValueTypes.MarkedContent:

                if (isHTML) {
                    contentHandler = new org.jpedal.objects.structuredtext.HTMLStructuredContentHandler(obj, current);
                } else {
                    contentHandler = new org.jpedal.objects.structuredtext.StructuredContentHandler(obj);
                }

                parserOptions.setContentHandler(contentHandler);
                break;

            case Options.GlyphTracker:
                parserOptions.setCustomGlyphTracker((GlyphTracker) obj);
                break;

            case ValueTypes.ImageHandler:
                this.customImageHandler = (ImageHandler) obj;
                if (customImageHandler != null && current != null) {
                    current.writeCustom(DynamicVectorRenderer.CUSTOM_IMAGE_HANDLER, this.customImageHandler);
                }
                break;

            /*
                 * setup stream decoder to render directly to g2
                 * (used by image extraction)
             */
            case ValueTypes.DirectRendering:

                parserOptions.setRenderDirectly(true);

                if (obj != null) {
                    final Graphics2D g2 = (Graphics2D) obj;
                    parserOptions.defaultClip = g2.getClip();
                }
                break;

            /* should be called after constructor or other methods may not work*/
            case ValueTypes.ObjectStore:
                objectStoreStreamRef = (ObjectStore) obj;

                if (customImageHandler != null && current != null) {
                    current.writeCustom(DynamicVectorRenderer.CUSTOM_IMAGE_HANDLER, customImageHandler);
                }

                break;

            case Options.ErrorTracker:
                this.errorTracker = (ErrorTracker) obj;
                break;

            case Options.ShapeTracker:
                parserOptions.setCustomShapeTracker((ShapeTracker) obj);
                break;

        }
    }

    /**
     * flag to show interrupted by user
     */
    boolean isPrinting;

    /**
     * NOT PART OF API tells software to generate glyph when first rendered not
     * when decoded. Should not need to be called in general usage
     *
     * @param key
     * @param value
     */
    public void setBooleanValue(final int key, final boolean value) {

        switch (key) {

            case GenerateGlyphOnRender:
                parserOptions.setGenerateGlyphOnRender(value);
                break;
        }
    }

    /**/

    /**
     * used internally to allow for colored streams
     */
    public void setDefaultColors(final PdfPaint strokeCol, final PdfPaint nonstrokeCol) {

        gs.strokeColorSpace.setColor(strokeCol);
        gs.nonstrokeColorSpace.setColor(nonstrokeCol);
        gs.setStrokeColor(strokeCol);
        gs.setNonstrokeColor(nonstrokeCol);
    }

    /**
     * return the data
     */
    public Object getObjectValue(final int key) {

        switch (key) {
            case ValueTypes.PDFData:
                if (DecoderOptions.embedWidthData) {
                    pdfData.widthIsEmbedded();
                }

                // store page width/height so we can translate 270
                // rotation co-ords
                //pdfData.maxX = pageData.getMediaBoxWidth(pageNum);
                //pdfData.maxY = pageData.getMediaBoxHeight(pageNum);
                return pdfData;

            case ValueTypes.PDFImages:
                return pdfImages;

            case ValueTypes.TextAreas:
                return textAreas;

            case ValueTypes.TextDirections:
                return textDirections;

            case ValueTypes.DynamicVectorRenderer:
                return current;

            case PdfDictionary.Font:
                return pdfFontFactory.getFontsInFile();

            case PdfDictionary.Image:
                return imagesInFile;

            case DecodeStatus.NonEmbeddedCIDFonts:
                return pdfFontFactory.getnonEmbeddedCIDFonts();

            case PageInfo.COLORSPACES:
                return cache.iterator(PdfObjectCache.ColorspacesUsed);

            default:
                return null;

        }
    }

    /**
     * read page header and extract page metadata
     *
     * @throws PdfException
     */
    public final void readResources(final PdfObject Resources, final boolean resetList) throws PdfException {

        if (resetList) {
            pdfFontFactory.resetfontsInFile();
        }

        currentPdfFile.checkResolved(Resources);

        cache.readResources(Resources, resetList, currentPdfFile.getObjectReader());

    }

    /**
     * decode the actual 'Postscript' stream into text and images by extracting
     * commands and decoding each.
     */
    public String decodeStreamIntoObjects(final byte[] stream, final boolean returnText) {

        if (stream.length == 0) {
            return null;
        }

        //start of Dictionary on Inline image
        int startInlineStream = 0;

        final CommandParser parser = new CommandParser(stream);

        final int streamSize = stream.length;
        int dataPointer = 0;
        int startCommand = 0;
        int shapeCommandCount = 0;

        PdfShape currentDrawShape = null;

        if (parserOptions.useJavaFX()) {
            final JavaFXSupport fxSupport = ExternalHandlers.getFXHandler();
            if (fxSupport != null) {
                currentDrawShape = fxSupport.getFXShape();
            }

        } else {
            currentDrawShape = new SwingShape();
        }

        //setup textDecoder
        final Tj textDecoder;
        if (parserOptions.hasContentHandler()) {
            textDecoder = new Tj(parserOptions, textAreas, textDirections, current, errorTracker);
        } else {
            textDecoder = new Tj(parserOptions, pdfData, isXMLExtraction, textAreas, textDirections, current, errorTracker);
            textDecoder.setReturnText(returnText);
        }
        textDecoder.setStreamType(streamType);

        if (statusBar != null && !parserOptions.renderDirectly()) {
            statusBar.percentageDone = 0;
            statusBar.resetStatus("stream");
        }

        /*
         * loop to read stream and decode
         */
        while (true) {

            //allow user to request exit and fail page
            if (errorTracker.checkForExitRequest(dataPointer, streamSize)) {
                break;
            }

            if (statusBar != null && !parserOptions.renderDirectly()) {
                statusBar.percentageDone = (90 * dataPointer) / streamSize;
            }

            dataPointer = parser.getCommandValues(dataPointer, tokenNumber);
            final int commandID = parser.getCommandID();

            //use negative flag to show commands found
            if (dataPointer < 0) {

                dataPointer = -dataPointer;
                try {

                    /*
                     * call method to handle commands
                     */
                    final int commandType = Cmd.getCommandType(commandID);

                    /*text commands first and all other
                     * commands if not found in first
                     **/
                    switch (commandType) {

                        case Cmd.TEXT_COMMAND:

                            if ((commandID == Cmd.EMC || commandID == Cmd.BDC || parserOptions.isLayerVisible()) && !getSamplingOnly && (renderText || textExtracted)) {

                                dataPointer = processTextToken(textDecoder, parser, commandID, startCommand, dataPointer);

                            }
                            break;

                        case Cmd.SHAPE_COMMAND:

                            if (!getSamplingOnly) {
                                processShapeCommands(parser, currentDrawShape, commandID);

                                shapeCommandCount++;

                                if (maxShapesAllowed > 0 && shapeCommandCount > maxShapesAllowed) {

                                    final String errMessage = "[PDF] Shapes on page exceed limit set by JVM flag org.jpedal.maxShapeCount - value " + maxShapesAllowed;

                                    parserOptions.tooManyShapes = true;
                                    throw new PdfException(errMessage);
                                }
                            }

                            break;

                        case Cmd.SHADING_COMMAND:

                            // Internal tests can disable images to speed up conversion
                            if (System.getProperty("testsDisableImages") != null) {
                                break;
                            }

                            if (!getSamplingOnly && parserOptions.isRenderPage()) {

                                if (parserOptions.useJavaFX) {
                                    if (!showFXShadingMessage) {
                                        System.out.println("SH not implemented in JavaFX yet");

                                        showFXShadingMessage = true;
                                    }
                                } else {
                                    SH.execute(parser.generateOpAsString(0, true), cache, gs,
                                            isPrinting, parserOptions.getPageNumber(), currentPdfFile,
                                            pageData, current);
                                }
                            }

                            break;

                        case Cmd.COLOR_COMMAND:

                            if (!getSamplingOnly) {
                                processColor(parser, commandID);
                            }

                            break;

                        case Cmd.GS_COMMAND:

                            processGScommands(parser, commandID);

                            //may have changed so read back and reset
                            if (commandID == Cmd.cm && textDecoder != null) {
                                multipleTJs = false;
                            }

                            break;

                        case Cmd.IMAGE_COMMAND:

                            if (commandID == Cmd.BI) {
                                startInlineStream = dataPointer;
                            } else {

                                PdfObject XObject = null;
                                int subtype = 1;
                                if (commandID == Cmd.Do) {

                                    final String name = parser.generateOpAsString(0, true);
                                    //byte[] rawData;

                                    final byte[] XObjectData = cache.getXObjects(name);
                                    if (XObjectData != null) {

                                        XObject = PdfObjectFactory.getPDFObjectObjectFromRefOrDirect(new XObject("1 0 R"), currentPdfFile.getObjectReader(), XObjectData, PdfDictionary.XObject);

                                        subtype = XObject.getParameterConstant(PdfDictionary.Subtype);
                                    }

                                    if (subtype == PdfDictionary.Form) {

                                        if (formLevel > 100 && dataPointer == lastDataPointer) {
                                            //catch for odd files like 11jun/results.pdf
                                        } else {
                                            lastDataPointer = dataPointer;

                                            if (!parserOptions.isLayerVisible() || (layers != null && !layers.isVisible(XObject)) || XObject == null) {
                                                //
                                            } else {
                                                XFormDecoder.processXForm(this, dataPointer, XObject, parserOptions.defaultClip, parser);
                                            }

                                            //THIS TURNS OUT TO BE A BAD IDEA!!!!!
                                            //breaks [1719] P012-209_001 Projektplan-Projekt-Plan Nord.pdf
                                            //if lots of objects in play turn back to ref to save memory
//                                            if(1==2 && rawData!=null && cache.getXObjectCount()>30){
//                                                 String ref=XObject.getObjectRefAsString();
//
//                                                cache.resetXObject(name,ref,rawData);
//                                                XObject=null;
//
//                                            }
                                        }
                                    }
                                }

                                if (subtype != PdfDictionary.Form) {

                                    final ImageDecoder imageDecoder;

                                    if (commandID != Cmd.Do) {
                                        imageDecoder = new ID(imageCount, currentPdfFile, errorTracker, customImageHandler, objectStoreStreamRef, pdfImages, pageData, imagesInFile);
                                    } else {
                                        imageDecoder = new DO(imageCount, currentPdfFile, errorTracker, customImageHandler, objectStoreStreamRef, pdfImages, pageData, imagesInFile);
                                    }

                                    imageDecoder.setRes(cache);
                                    imageDecoder.setGS(gs);
                                    imageDecoder.setSamplingOnly(getSamplingOnly);
                                    imageDecoder.setStreamType(streamType);
                                    //imageDecoder.setName(fileName);
                                    imageDecoder.setMultiplyer(multiplyer);
                                    //imageDecoder.setFloatValue(SamplingUsed, samplingUsed);
                                    //imageDecoder.setFileHandler(currentPdfFile);
                                    imageDecoder.setRenderer(current);

                                    parserOptions.isPrinting(isPrinting);
                                    imageDecoder.setParams(parserOptions);

                                    if (commandID == Cmd.Do) {

                                        //size test to remove odd lines in abacus file abacus/EP_Print_Post_Suisse_ID_120824.pdf
                                        if (XObject == null || !parserOptions.isLayerVisible() || (layers != null && !layers.isVisible(XObject)) || (gs.CTM != null && gs.CTM[1][1] == 0 && gs.CTM[1][0] != 0 && Math.abs(gs.CTM[1][0]) < 0.2)) {
                                            //ignore
                                        } else {
                                            String name = parser.generateOpAsString(0, true);
                                            //name is not unique if in form so we add form level to separate out
                                            if (formLevel > 0) {
                                                name = formName + '_' + formLevel + '_' + name;
                                            }
                                            dataPointer = imageDecoder.processImage(name, dataPointer, XObject);
                                        }
                                    } else if (parserOptions.isLayerVisible()) {
                                        dataPointer = imageDecoder.processImage(dataPointer, startInlineStream, parser.getStream(), tokenNumber);
                                    }

                                    imageCount++;

                                    imagesInFile = imageDecoder.getImagesInFile();

                                }
                            }
                            break;

                        case Cmd.T3_COMMAND:

                            if (!getSamplingOnly && (renderText || textExtracted)) {

                                if (t3Decoder == null) {
                                    t3Decoder = new T3Decoder();
                                }

                                t3Decoder.setCommands(parser);
                                t3Decoder.setCommands(parser);
                                t3Decoder.processToken(commandID);

                            }
                            break;
                    }
                } catch (final Exception e) {

                    LogWriter.writeLog("[PDF] " + e + " Processing token >" + Cmd.getCommandAsString(commandID) + "<>" + parserOptions.getFileName() + " <" + parserOptions.getPageNumber());

                    //only exit if no issue with stream
                    if (!isDataValid) {
                        dataPointer = streamSize;
                    }

                } catch (final OutOfMemoryError ee) {
                    errorTracker.addPageFailureMessage("Memory error decoding token stream " + ee);

                    LogWriter.writeLog("[MEMORY] Memory error - trying to recover");
                }

                //save for next command
                startCommand = dataPointer;

                //reset array of trailing values
                parser.reset();

                //increase pointer
                incrementTokenNumber();
            }

            //break at end
            if (streamSize <= dataPointer) {
                break;
            }
        }

        if (!parserOptions.renderDirectly() && statusBar != null) {
            statusBar.percentageDone = 100;
        }

        //pick up TextDecoder values
        isTTHintingRequired = textDecoder.isTTHintingRequired();

        if (returnText) {

            return lastTextValue;

        } else {
            return "";
        }
    }

    void processColor(final CommandParser parser, final int commandID) {

        if (commandID != Cmd.SCN && commandID != Cmd.scn && commandID != Cmd.SC && commandID != Cmd.sc) {
            current.writeCustom(DynamicVectorRenderer.RESET_COLORSPACE, null);
        }

        switch (commandID) {

            case Cmd.cs:
                CS.execute(true, parser.generateOpAsString(0, true), gs, cache, currentPdfFile, isPrinting);
                break;

            case Cmd.CS:
                CS.execute(false, parser.generateOpAsString(0, true), gs, cache, currentPdfFile, isPrinting);
                break;

            case Cmd.rg:
                RG.execute(true, gs, parser, cache);
                break;

            case Cmd.RG:
                RG.execute(false, gs, parser, cache);
                break;

            case Cmd.SCN:
                SCN.execute(false, gs, parser, cache);
                break;

            case Cmd.scn:
                SCN.execute(true, gs, parser, cache);
                break;

            case Cmd.SC:
                SCN.execute(false, gs, parser, cache);
                break;

            case Cmd.sc:
                SCN.execute(true, gs, parser, cache);
                break;

            case Cmd.g:
                G.execute(true, gs, parser, cache);
                break;

            case Cmd.G:
                G.execute(false, gs, parser, cache);
                break;

            case Cmd.k:
                K.execute(true, gs, parser, cache);
                break;

            case Cmd.K:
                K.execute(false, gs, parser, cache);
                break;

        }
    }

    private void processGScommands(final CommandParser parser, final int commandID) {

        switch (commandID) {

            case Cmd.cm:

                CM.execute(gs, parser);
                break;

            case Cmd.q:
                gs = Q.execute(gs, true, graphicsStates, current);
                break;

            case Cmd.Q:
                gs = Q.execute(gs, false, graphicsStates, current);
                break;

            case Cmd.gs:
                if (!getSamplingOnly) {

                    final String key = parser.generateOpAsString(0, true);
                    final byte[] data = cache.GraphicsStates.get(key);

                    final PdfObject GS = getExtStateObjectFromRefOrDirect(currentPdfFile, data);

                    gs.setMode(GS);

                    final int blendMode = gs.getBMValue();

                    current.setGraphicsState(GraphicsState.FILL, gs.getAlpha(GraphicsState.FILL), blendMode);
                    current.setGraphicsState(GraphicsState.STROKE, gs.getAlpha(GraphicsState.STROKE), blendMode);

                    currentBlendMode = blendMode;
                }

                break;

        }

    }

    private void processShapeCommands(final CommandParser parser, final PdfShape currentDrawShape, final int commandID) {

        switch (commandID) {

            case Cmd.B:
                if (!removeRenderImages) {
                    final Shape currentShape = B.execute(false, false, gs, formLevel, currentDrawShape, current, parserOptions);
                    //track for user if required
                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.B, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }
                }
                break;

            case Cmd.b:
                if (!removeRenderImages) {
                    final Shape currentShape = B.execute(false, true, gs, formLevel, currentDrawShape, current, parserOptions);
                    //track for user if required
                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.b, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }
                }
                break;

            case Cmd.bstar:
                if (!removeRenderImages) {
                    final Shape currentShape = B.execute(true, true, gs, formLevel, currentDrawShape, current, parserOptions);
                    //track for user if required
                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.bstar, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }
                }
                break;

            case Cmd.Bstar:
                if (!removeRenderImages) {
                    final Shape currentShape = B.execute(true, false, gs, formLevel, currentDrawShape, current, parserOptions);
                    //track for user if required
                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.Bstar, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }
                }
                break;

            case Cmd.c:
                final float x3 = parser.parseFloat(1);
                final float y3 = parser.parseFloat(0);
                final float x2 = parser.parseFloat(3);
                final float y2 = parser.parseFloat(2);
                final float x = parser.parseFloat(5);
                final float y = parser.parseFloat(4);
                currentDrawShape.addBezierCurveC(x, y, x2, y2, x3, y3);
                break;

            case Cmd.d:
                D.execute(parser, gs);
                break;

            case Cmd.F:

                if (!removeRenderImages) {
                    F.execute(tokenNumber, false, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
                }
                break;

            case Cmd.f:
                if (!removeRenderImages) {
                    F.execute(tokenNumber, false, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
                }
                break;

            case Cmd.Fstar:
                if (!removeRenderImages) {
                    F.execute(tokenNumber, true, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
                }
                break;

            case Cmd.fstar:
                if (!removeRenderImages) {
                    F.execute(tokenNumber, true, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
                }
                break;

            case Cmd.h:
                currentDrawShape.closeShape();
                break;

            //case Cmd.i:
            //  I();
            //break;
            case Cmd.J:
                J.execute(false, parser.parseInt(), gs);
                break;

            case Cmd.j:
                J.execute(true, parser.parseInt(), gs);
                break;

            case Cmd.l:
                currentDrawShape.lineTo(parser.parseFloat(1), parser.parseFloat(0));
                break;

            case Cmd.M:
                gs.setMitreLimit((int) (parser.parseFloat(0)));
                break;

            case Cmd.m:
                currentDrawShape.setClip(false);
                currentDrawShape.moveTo(parser.parseFloat(1), parser.parseFloat(0));
                break;

            case Cmd.n:
                N.execute(currentDrawShape, gs, formLevel, parserOptions.defaultClip, parserOptions, current, pageData);
                break;

            case Cmd.re:
                currentDrawShape.appendRectangle(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
                break;

            case Cmd.S:
                if (!removeRenderImages) {

                    final Shape currentShape = S.execute(false, gs, currentDrawShape, current, parserOptions);

                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.S, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }

                }
                break;
            case Cmd.s:
                if (!removeRenderImages) {
                    final Shape currentShape = S.execute(true, gs, currentDrawShape, current, parserOptions);
                    //track for user if required
                    if (currentShape != null) {
                        final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
                        if (customShapeTracker != null) {
                            customShapeTracker.addShape(tokenNumber, Cmd.s, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
                        }
                    }
                }
                break;

            case Cmd.v:
                currentDrawShape.addBezierCurveV(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
                break;

            case Cmd.w:
                gs.setLineWidth(parser.parseFloat(0));
                break;

            case Cmd.Wstar: //set Winding rule
                currentDrawShape.setEVENODDWindingRule();
                currentDrawShape.setClip(true);
                break;

            case Cmd.W:
                currentDrawShape.setNONZEROWindingRule();
                currentDrawShape.setClip(true);
                break;

            case Cmd.y:
                currentDrawShape.addBezierCurveY(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
                break;
        }
    }

    /**
     * return boolean flags with appropriate ket
     */
    public boolean getBooleanValue(final int key) {

        switch (key) {

            case ValueTypes.EmbeddedFonts:
                return pdfFontFactory.hasEmbeddedFonts();

            case ValueTypes.StructuredContent:

                if (contentHandler == null) {
                    return false;
                } else {
                    return contentHandler.hasContent();
                }

            case DecodeStatus.PageDecodingSuccessful:
                return errorTracker.ispageSuccessful();

            case DecodeStatus.NonEmbeddedCIDFonts:
                return pdfFontFactory.hasNonEmbeddedCIDFonts();

            case DecodeStatus.ImagesProcessed:
                return parserOptions.imagesProcessedFully;

            case DecodeStatus.TooManyShapes:
                return parserOptions.tooManyShapes;

            case DecodeStatus.YCCKImages:
                return parserOptions.hasYCCKimages;

            case DecodeStatus.TTHintingRequired:
                return isTTHintingRequired;

            default:
                throw new RuntimeException("Unknown value " + key);
        }
    }

    public void dispose() {

        if (pdfData != null) {
            this.pdfData.dispose();
        }

        //this.pageLines=null;
    }

    public void setIntValue(final int key, final int value) {

        switch (key) {

            case ValueTypes.PageNum:
                parserOptions.setPageNumber(value);
                break;

            /*
                 * tells program to try and use Java's font printing if possible
                 * as work around for issue with PCL printing
             */
            case ValueTypes.TextPrint:
                parserOptions.setTextPrint(value);
                break;
        }
    }

    public void setXMLExtraction(final boolean isXMLExtraction) {
        this.isXMLExtraction = isXMLExtraction;
    }

    public void setParameters(final boolean isPageContent, final boolean renderPage, final int renderMode, final int extractionMode, final boolean isPrinting, final boolean useJavaFX) {

        parserOptions.init(isPageContent, renderPage, renderMode, extractionMode, isPrinting, useJavaFX);

        /*
         * flags
         */
        renderText = renderPage && (renderMode & PdfDecoderInt.RENDERTEXT) == PdfDecoderInt.RENDERTEXT;

        textExtracted = (extractionMode & PdfDecoderInt.TEXT) == PdfDecoderInt.TEXT;

        textColorExtracted = (extractionMode & PdfDecoderInt.TEXTCOLOR) == PdfDecoderInt.TEXTCOLOR;

        removeRenderImages = renderPage && (renderMode & PdfDecoderInt.REMOVE_RENDERSHAPES) == PdfDecoderInt.REMOVE_RENDERSHAPES;

    }

    public void setFormLevel(final int value) {

        formLevel = value;
    }

    /**
     * process each token and add to text or decode if not known command, place
     * in array (may be operand which is later used by command)
     */
    private int processTextToken(final Tj textDecoder, final CommandParser parser, final int commandID, int startCommand, final int dataPointer) {

        textDecoder.setGS(gs);

        final TextState currentTextState = gs.getTextState();

        if (commandID == Cmd.BT && parserOptions.isRenderPage()) {
            //save for later and set TR
            current.drawClip(gs, parserOptions.defaultClip, true);
            current.drawTR(GraphicsState.FILL);

        }

        if (commandID == Cmd.Tj || commandID == Cmd.TJ || commandID == Cmd.quote || commandID == Cmd.doubleQuote) {

            if (currentTextState.hasFontChanged() && currentTextState.getTfs() != 0) { //avoid text which does not appear as zero size

                //switch to correct font
                final String fontID = currentTextState.getFontID();
                final PdfFont restoredFont = FontResolver.resolveFont(gs, this, fontID, pdfFontFactory, cache);
                if (restoredFont != null) {
                    currentFontData = restoredFont;
                }
            }

            if (currentFontData == null) {
                currentFontData = new PdfFont(currentPdfFile);

                //use name for poss mappings (ie Helv)
                currentFontData.getGlyphData().logicalfontName = StandardFonts.expandName(currentTextState.getFontID());
            }

            if (currentTextState.hasFontChanged()) {
                currentTextState.setFontChanged(false);
            }
        }

        switch (commandID) {

            case Cmd.BMC:
                parserOptions.setLayerLevel(parserOptions.getLayerLevel() + 1);

                //flag so we can next values
                if (parserOptions.isLayerVisible()) {
                    parserOptions.getLayerVisibility().add(parserOptions.getLayerLevel());
                }

                if (contentHandler != null) {
                    contentHandler.BMC(parser.generateOpAsString(0, false));
                }

                break;

            case Cmd.BDC:

                final PdfObject BDCobj = BDC.execute(startCommand, dataPointer, parser.getStream(),
                        parser.generateOpAsString(0, false), gs, currentPdfFile, current, parserOptions);

                //work around for unbalanced clip
                if (BDCobj.getClip() != null) {
                    BDCDepth = graphicsStates.getDepth();
                } else {
                    BDCDepth = -1;
                }

                //track setting and use in preference for text extraction
                textDecoder.setActualText(BDCobj.getTextStreamValue(PdfDictionary.ActualText));

                if (contentHandler != null) {
                    contentHandler.BDC(BDCobj);
                }

                break;

            case Cmd.BT:
                currentTextState.resetTm();
                break;

            case Cmd.EMC:
                textDecoder.setActualText(null);
                if (contentHandler != null) {
                    contentHandler.EMC();
                }

                //balance stack inside tagged commands
                if (parserOptions.getLayerLevel() == 1 && BDCDepth != -1 && BDCDepth != graphicsStates.getDepth()) {
                    graphicsStates.correctDepth(0, gs, current);
                }
                BDCDepth = -1;

                EMC.execute(current, gs, parserOptions);
                break;

            case Cmd.ET:
                current.writeCustom(DynamicVectorRenderer.RESET_COLORSPACE, null);

                if (gs.getTextRenderType() == GraphicsState.CLIPTEXT) {
                    current.drawClip(gs, null, false);
                }

                break;

            case Cmd.DP:
                if (contentHandler != null) {

                    final MCObject obj = new MCObject(parser.generateOpAsString(0, false));
                    currentPdfFile.readObject(obj);

                    contentHandler.DP(obj);
                }
                break;

            case Cmd.Tf:
                currentTextState.TF(parser.parseFloat(0), (parser.generateOpAsString(1, true)));
                break;

            case Cmd.Tc:
                currentTextState.setCharacterSpacing(parser.parseFloat(0));
                break;

            case Cmd.TD:
                TD.execute(false, parser.parseFloat(1), parser.parseFloat(0), currentTextState);
                multipleTJs = false;
                break;

            case Cmd.Td:
                TD.execute(true, parser.parseFloat(1), parser.parseFloat(0), currentTextState);
                multipleTJs = false;
                break;

            case Cmd.Tj:

                if (currentTextState.getTfs() != 0) { //avoid zero size text
                    lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
                }
                multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.

                break;

            case Cmd.TJ:
                lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
                multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.

                break;

            case Cmd.quote:
                TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);
                multipleTJs = false;
                lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
                multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.

                break;

            case Cmd.doubleQuote:
                final byte[] characterStream = parser.getStream();

                currentTextState.setCharacterSpacing(parser.parseFloat(1));
                currentTextState.setWordSpacing(parser.parseFloat(2));

                TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);

                multipleTJs = false;

                //we can have values which are not accounted for before stream so rollon so we ignore
                while (characterStream[startCommand] != '(' && characterStream[startCommand] != '<' && characterStream[startCommand] != '[') {
                    startCommand++;
                }

                lastTextValue = textDecoder.TJ(currentTextState, currentFontData, characterStream, startCommand, dataPointer, multipleTJs);
                multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.

                break;

            case Cmd.Tm:
                //set Tm matrix
                currentTextState.Tm[0][0] = parser.parseFloat(5);
                currentTextState.Tm[0][1] = parser.parseFloat(4);
                currentTextState.Tm[0][2] = 0;
                currentTextState.Tm[1][0] = parser.parseFloat(3);
                currentTextState.Tm[1][1] = parser.parseFloat(2);
                currentTextState.Tm[1][2] = 0;
                currentTextState.Tm[2][0] = parser.parseFloat(1);
                currentTextState.Tm[2][1] = parser.parseFloat(0);
                currentTextState.Tm[2][2] = 1;

                //keep position in case we need
                currentTextState.setTMAtLineStart();
                multipleTJs = false;
                break;

            case Cmd.Tstar:
                TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);
                multipleTJs = false;
                break;

            case Cmd.Tr:
                final int value = TR.execute(parser.parseInt(), gs);
                if (parserOptions.isRenderPage() && !parserOptions.renderDirectly()) {
                    current.drawTR(value);
                }
                break;

            case Cmd.Ts:
                currentTextState.setTextRise(parser.parseFloat(0));
                break;

            case Cmd.Tw:
                currentTextState.setWordSpacing(parser.parseFloat(0));
                break;

            case Cmd.Tz:
                currentTextState.setHorizontalScaling(parser.parseFloat(0) / 100);
                break;

            case Cmd.TL:
                currentTextState.setLeading(parser.parseFloat(0));
                break;
        }
        return dataPointer;
    }

    public PdfObjectCache getObjectCache() {
        return cache;
    }

    public int getBlendMode() {
        return currentBlendMode;
    }

    public void incrementTokenNumber() {
        tokenNumber++;
        current.setValue(DynamicVectorRenderer.TOKEN_NUMBER, tokenNumber);
    }

    private static ExtGStateObject getExtStateObjectFromRefOrDirect(final PdfObjectReader currentPdfFile, final byte[] data) {

        final ExtGStateObject obj = new ExtGStateObject(new String(data));

        if (data[0] == '<') {
            obj.setStatus(PdfObject.UNDECODED_DIRECT);
        } else {
            obj.setStatus(PdfObject.UNDECODED_REF);
        }
        obj.setUnresolvedData(data, PdfDictionary.ExtGState);

        final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile.getObjectReader());
        objectDecoder.checkResolved(obj);

        return obj;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy