org.jpedal.parser.PdfStreamDecoder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of OpenViewerFX Show documentation
Show all versions of OpenViewerFX Show documentation
Open Source (LGPL) JavaFX PDF Viewer
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* PdfStreamDecoder.java
* ---------------
*/
package org.jpedal.parser;
import java.awt.Graphics2D;
import java.awt.Rectangle;
import java.awt.Shape;
import org.jpedal.PdfDecoderInt;
import org.jpedal.color.PdfPaint;
import org.jpedal.constants.PageInfo;
import org.jpedal.exception.PdfException;
import org.jpedal.external.*;
import org.jpedal.fonts.PdfFont;
import org.jpedal.fonts.StandardFonts;
import org.jpedal.fonts.glyph.JavaFXSupport;
import org.jpedal.fonts.glyph.T3Size;
import org.jpedal.images.SamplingFactory;
import org.jpedal.io.DefaultErrorTracker;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.ObjectStore;
import org.jpedal.io.PdfObjectFactory;
import org.jpedal.io.PdfObjectReader;
import org.jpedal.io.StatusBar;
import org.jpedal.objects.*;
import org.jpedal.objects.layers.PdfLayerList;
import org.jpedal.objects.raw.ExtGStateObject;
import org.jpedal.objects.raw.MCObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.objects.raw.XObject;
import org.jpedal.parser.color.*;
import org.jpedal.parser.gs.CM;
import org.jpedal.parser.gs.Q;
import org.jpedal.parser.image.DO;
import org.jpedal.parser.image.ID;
import org.jpedal.parser.image.ImageDecoder;
import org.jpedal.parser.shape.*;
import org.jpedal.parser.text.*;
import org.jpedal.render.DynamicVectorRenderer;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.repositories.Vector_Int;
import org.jpedal.utils.repositories.generic.Vector_Rectangle_Int;
/**
* Contains the code which 'parses' the commands in the stream and extracts the
* data (images and text). Users should not need to call it.
*/
public class PdfStreamDecoder extends BaseDecoder {
private static boolean showFXShadingMessage;
protected org.jpedal.objects.structuredtext.StructuredContentHandler contentHandler;
int formLevel;
private int BDCDepth = -1;
PdfObjectCache cache;
PdfPageData pageData;
ErrorTracker errorTracker;
PdfObjectReader currentPdfFile;
protected GraphicsState newGS;
protected byte[] pageStream;
PdfLayerList layers;
protected boolean getSamplingOnly;
private boolean isTTHintingRequired;
final Vector_Int textDirections = new Vector_Int();
final Vector_Rectangle_Int textAreas = new Vector_Rectangle_Int();
/**
* shows if t3 glyph uses internal colour or current colour
*/
public boolean ignoreColors;
/**
* images on page
*/
int imageCount;
String lastTextValue = "";
//trap for recursive loop of xform calling itself
int lastDataPointer = -1;
private T3Decoder t3Decoder;
/**
* flag to show if we REMOVE shapes
*/
private boolean removeRenderImages;
//last Trm incase of multple Tj commands
private boolean multipleTJs;
/**
* flags to show we need colour data as well
*/
private boolean textColorExtracted;
/**
* flag to show text is being extracted
*/
private boolean textExtracted = true;
/**
* flag to show content is being rendered
*/
private boolean renderText;
private int tokenNumber;
/**
* lisßt of images used for display
*/
String imagesInFile;
//set threshold - value indicates several possible values
public static final float currentThreshold = 0.595f;
protected ImageHandler customImageHandler;
private PdfFontFactory pdfFontFactory;
boolean isXMLExtraction;
/**
* internal development flag which should not be used
*/
//turn on debugging to see commands
public static final boolean showCommands = false;
// public static boolean showCommands = true;
/**
* interactive display
*/
private StatusBar statusBar;
/**
* store text data and can be passed out to other classes
*/
final PdfData pdfData = new PdfData();
/**
* store image data extracted from pdf
*/
final PdfImageData pdfImages = new PdfImageData();
/**
* used to debug
*/
protected static String indent = "";
/**
* show if possible error in stream data
*/
protected boolean isDataValid = true;
/**
* used to store font information from pdf and font functionality
*/
private PdfFont currentFontData;
protected ObjectStore objectStoreStreamRef;
String formName = "";
public static boolean useTextPrintingForNonEmbeddedFonts;
/**
* allows us to terminate file if looks like might crash JVM due to complexity
*/
private static int maxShapesAllowed = -1;
// Used to get the blendmode of an object in PDFObjectToImage
private int currentBlendMode = PdfDictionary.Normal;
static {
SamplingFactory.setDownsampleMode(null);
/*
* we have PDFs which crashes JVM so workaround to avoid this.
*/
final String maxShapes = System.getProperty("org.jpedal.maxShapeCount");
if (maxShapes != null) {
try {
maxShapesAllowed = Integer.parseInt(maxShapes);
} catch (final Exception e) {
throw new RuntimeException("Your setting (" + maxShapes + ")for org.jpedal.maxShapeCount is not a valid number " + e);
}
}
}
public PdfStreamDecoder(final PdfObjectReader currentPdfFile) {
init(currentPdfFile);
}
/**
* create new StreamDecoder to create display
*/
public PdfStreamDecoder(final PdfObjectReader currentPdfFile, final PdfLayerList layers) {
if (layers != null) {
this.layers = layers;
}
init(currentPdfFile);
}
private void init(final PdfObjectReader currentPdfFile) {
cache = new PdfObjectCache();
gs = new GraphicsState();
errorTracker = new DefaultErrorTracker();
pageData = new PdfPageData();
StandardFonts.checkLoaded(StandardFonts.STD);
StandardFonts.checkLoaded(StandardFonts.MAC);
this.currentPdfFile = currentPdfFile;
pdfFontFactory = new PdfFontFactory(currentPdfFile);
}
/**
* objects off the page, stitch into a stream and decode and put into our
* data object. Could be altered if you just want to read the stream
*
* @param pdfObject
* @throws PdfException
*/
public T3Size decodePageContent(final PdfObject pdfObject) throws PdfException {
try {
//check switched off
parserOptions.imagesProcessedFully = true;
parserOptions.tooManyShapes = false;
//reset count
imageCount = 0;
parserOptions.setPdfLayerList(this.layers);
//reset count
imagesInFile = null; //also reset here as good point as syncs with font code
if (!parserOptions.renderDirectly() && statusBar != null) {
statusBar.percentageDone = 0;
}
if (newGS != null) {
gs = newGS;
} else {
gs = new GraphicsState(0, 0);
}
//save for later
if (parserOptions.isRenderPage()) {
/*
* check setup and throw exception if null
*/
if (current == null) {
throw new PdfException("DynamicVectorRenderer not setup PdfStreamDecoder setStore(...) should be called");
}
current.drawClip(gs, parserOptions.defaultClip, false);
final int pageNum = parserOptions.getPageNumber();
//Paint background here to ensure we all for changed background color in extraction modes
current.writeCustom(DynamicVectorRenderer.PAINT_BACKGROUND, new Rectangle(pageData.getCropBoxX(pageNum), pageData.getCropBoxY(pageNum),
pageData.getCropBoxWidth(pageNum), pageData.getCropBoxHeight(pageNum)));
}
//get the binary data from the file
final byte[] b_data;
byte[][] pageContents = null;
if (pdfObject != null) {
pageContents = pdfObject.getKeyArray(PdfDictionary.Contents);
isDataValid = pdfObject.streamMayBeCorrupt();
}
if (pdfObject != null && pageContents == null) {
b_data = currentPdfFile.readStream(pdfObject, true, true, false, false, false, pdfObject.getCacheName(currentPdfFile.getObjectReader()));
} else if (pageStream != null) {
b_data = pageStream;
} else {
b_data = currentPdfFile.getObjectReader().readPageIntoStream(pdfObject);
}
//trap for recursive loop of xform calling itself
lastDataPointer = -1;
//if page data found, turn it into a set of commands
//and decode the stream of commands
if (b_data != null && b_data.length > 0) {
decodeStreamIntoObjects(b_data, false);
}
//flush fonts
if (!parserOptions.isType3Font()) {
cache.resetFonts();
}
final T3Size t3 = new T3Size();
if (t3Decoder != null) {
t3.x = t3Decoder.T3maxWidth;
t3.y = t3Decoder.T3maxHeight;
ignoreColors = t3Decoder.ignoreColors;
t3Decoder = null;
}
return t3;
} catch (final Error err) {
LogWriter.writeLog("Error " + err);
if (ExternalHandlers.throwMissingCIDError && err.getMessage() != null && err.getMessage().contains("kochi")) {
throw err;
}
errorTracker.addPageFailureMessage("Problem decoding page " + err);
}
return null;
}
public void setObjectValue(final int key, final Object obj) {
switch (key) {
case ValueTypes.Name:
parserOptions.setName((String) obj);
break;
case ValueTypes.PDFPageData:
pageData = (PdfPageData) obj;
//flag if colour info being extracted
if (textColorExtracted) {
pdfData.enableTextColorDataExtraction();
}
break;
/*
* pass in status bar object
*
*/
case ValueTypes.StatusBar:
this.statusBar = (StatusBar) obj;
break;
case ValueTypes.PdfLayerList:
this.layers = (PdfLayerList) obj;
break;
/*
* used internally for structured content extraction.
*/
case ValueTypes.MarkedContent:
if (isHTML) {
contentHandler = new org.jpedal.objects.structuredtext.HTMLStructuredContentHandler(obj, current);
} else {
contentHandler = new org.jpedal.objects.structuredtext.StructuredContentHandler(obj);
}
parserOptions.setContentHandler(contentHandler);
break;
case Options.GlyphTracker:
parserOptions.setCustomGlyphTracker((GlyphTracker) obj);
break;
case ValueTypes.ImageHandler:
this.customImageHandler = (ImageHandler) obj;
if (customImageHandler != null && current != null) {
current.writeCustom(DynamicVectorRenderer.CUSTOM_IMAGE_HANDLER, this.customImageHandler);
}
break;
/*
* setup stream decoder to render directly to g2
* (used by image extraction)
*/
case ValueTypes.DirectRendering:
parserOptions.setRenderDirectly(true);
if (obj != null) {
final Graphics2D g2 = (Graphics2D) obj;
parserOptions.defaultClip = g2.getClip();
}
break;
/* should be called after constructor or other methods may not work*/
case ValueTypes.ObjectStore:
objectStoreStreamRef = (ObjectStore) obj;
if (customImageHandler != null && current != null) {
current.writeCustom(DynamicVectorRenderer.CUSTOM_IMAGE_HANDLER, customImageHandler);
}
break;
case Options.ErrorTracker:
this.errorTracker = (ErrorTracker) obj;
break;
case Options.ShapeTracker:
parserOptions.setCustomShapeTracker((ShapeTracker) obj);
break;
}
}
/**
* flag to show interrupted by user
*/
boolean isPrinting;
/**
* NOT PART OF API tells software to generate glyph when first rendered not
* when decoded. Should not need to be called in general usage
*
* @param key
* @param value
*/
public void setBooleanValue(final int key, final boolean value) {
switch (key) {
case GenerateGlyphOnRender:
parserOptions.setGenerateGlyphOnRender(value);
break;
}
}
/**/
/**
* used internally to allow for colored streams
*/
public void setDefaultColors(final PdfPaint strokeCol, final PdfPaint nonstrokeCol) {
gs.strokeColorSpace.setColor(strokeCol);
gs.nonstrokeColorSpace.setColor(nonstrokeCol);
gs.setStrokeColor(strokeCol);
gs.setNonstrokeColor(nonstrokeCol);
}
/**
* return the data
*/
public Object getObjectValue(final int key) {
switch (key) {
case ValueTypes.PDFData:
if (DecoderOptions.embedWidthData) {
pdfData.widthIsEmbedded();
}
// store page width/height so we can translate 270
// rotation co-ords
//pdfData.maxX = pageData.getMediaBoxWidth(pageNum);
//pdfData.maxY = pageData.getMediaBoxHeight(pageNum);
return pdfData;
case ValueTypes.PDFImages:
return pdfImages;
case ValueTypes.TextAreas:
return textAreas;
case ValueTypes.TextDirections:
return textDirections;
case ValueTypes.DynamicVectorRenderer:
return current;
case PdfDictionary.Font:
return pdfFontFactory.getFontsInFile();
case PdfDictionary.Image:
return imagesInFile;
case DecodeStatus.NonEmbeddedCIDFonts:
return pdfFontFactory.getnonEmbeddedCIDFonts();
case PageInfo.COLORSPACES:
return cache.iterator(PdfObjectCache.ColorspacesUsed);
default:
return null;
}
}
/**
* read page header and extract page metadata
*
* @throws PdfException
*/
public final void readResources(final PdfObject Resources, final boolean resetList) throws PdfException {
if (resetList) {
pdfFontFactory.resetfontsInFile();
}
currentPdfFile.checkResolved(Resources);
cache.readResources(Resources, resetList, currentPdfFile.getObjectReader());
}
/**
* decode the actual 'Postscript' stream into text and images by extracting
* commands and decoding each.
*/
public String decodeStreamIntoObjects(final byte[] stream, final boolean returnText) {
if (stream.length == 0) {
return null;
}
//start of Dictionary on Inline image
int startInlineStream = 0;
final CommandParser parser = new CommandParser(stream);
final int streamSize = stream.length;
int dataPointer = 0;
int startCommand = 0;
int shapeCommandCount = 0;
PdfShape currentDrawShape = null;
if (parserOptions.useJavaFX()) {
final JavaFXSupport fxSupport = ExternalHandlers.getFXHandler();
if (fxSupport != null) {
currentDrawShape = fxSupport.getFXShape();
}
} else {
currentDrawShape = new SwingShape();
}
//setup textDecoder
final Tj textDecoder;
if (parserOptions.hasContentHandler()) {
textDecoder = new Tj(parserOptions, textAreas, textDirections, current, errorTracker);
} else {
textDecoder = new Tj(parserOptions, pdfData, isXMLExtraction, textAreas, textDirections, current, errorTracker);
textDecoder.setReturnText(returnText);
}
textDecoder.setStreamType(streamType);
if (statusBar != null && !parserOptions.renderDirectly()) {
statusBar.percentageDone = 0;
statusBar.resetStatus("stream");
}
/*
* loop to read stream and decode
*/
while (true) {
//allow user to request exit and fail page
if (errorTracker.checkForExitRequest(dataPointer, streamSize)) {
break;
}
if (statusBar != null && !parserOptions.renderDirectly()) {
statusBar.percentageDone = (90 * dataPointer) / streamSize;
}
dataPointer = parser.getCommandValues(dataPointer, tokenNumber);
final int commandID = parser.getCommandID();
//use negative flag to show commands found
if (dataPointer < 0) {
dataPointer = -dataPointer;
try {
/*
* call method to handle commands
*/
final int commandType = Cmd.getCommandType(commandID);
/*text commands first and all other
* commands if not found in first
**/
switch (commandType) {
case Cmd.TEXT_COMMAND:
if ((commandID == Cmd.EMC || commandID == Cmd.BDC || parserOptions.isLayerVisible()) && !getSamplingOnly && (renderText || textExtracted)) {
dataPointer = processTextToken(textDecoder, parser, commandID, startCommand, dataPointer);
}
break;
case Cmd.SHAPE_COMMAND:
if (!getSamplingOnly) {
processShapeCommands(parser, currentDrawShape, commandID);
shapeCommandCount++;
if (maxShapesAllowed > 0 && shapeCommandCount > maxShapesAllowed) {
final String errMessage = "[PDF] Shapes on page exceed limit set by JVM flag org.jpedal.maxShapeCount - value " + maxShapesAllowed;
parserOptions.tooManyShapes = true;
throw new PdfException(errMessage);
}
}
break;
case Cmd.SHADING_COMMAND:
// Internal tests can disable images to speed up conversion
if (System.getProperty("testsDisableImages") != null) {
break;
}
if (!getSamplingOnly && parserOptions.isRenderPage()) {
if (parserOptions.useJavaFX) {
if (!showFXShadingMessage) {
System.out.println("SH not implemented in JavaFX yet");
showFXShadingMessage = true;
}
} else {
SH.execute(parser.generateOpAsString(0, true), cache, gs,
isPrinting, parserOptions.getPageNumber(), currentPdfFile,
pageData, current);
}
}
break;
case Cmd.COLOR_COMMAND:
if (!getSamplingOnly) {
processColor(parser, commandID);
}
break;
case Cmd.GS_COMMAND:
processGScommands(parser, commandID);
//may have changed so read back and reset
if (commandID == Cmd.cm && textDecoder != null) {
multipleTJs = false;
}
break;
case Cmd.IMAGE_COMMAND:
if (commandID == Cmd.BI) {
startInlineStream = dataPointer;
} else {
PdfObject XObject = null;
int subtype = 1;
if (commandID == Cmd.Do) {
final String name = parser.generateOpAsString(0, true);
//byte[] rawData;
final byte[] XObjectData = cache.getXObjects(name);
if (XObjectData != null) {
XObject = PdfObjectFactory.getPDFObjectObjectFromRefOrDirect(new XObject("1 0 R"), currentPdfFile.getObjectReader(), XObjectData, PdfDictionary.XObject);
subtype = XObject.getParameterConstant(PdfDictionary.Subtype);
}
if (subtype == PdfDictionary.Form) {
if (formLevel > 100 && dataPointer == lastDataPointer) {
//catch for odd files like 11jun/results.pdf
} else {
lastDataPointer = dataPointer;
if (!parserOptions.isLayerVisible() || (layers != null && !layers.isVisible(XObject)) || XObject == null) {
//
} else {
XFormDecoder.processXForm(this, dataPointer, XObject, parserOptions.defaultClip, parser);
}
//THIS TURNS OUT TO BE A BAD IDEA!!!!!
//breaks [1719] P012-209_001 Projektplan-Projekt-Plan Nord.pdf
//if lots of objects in play turn back to ref to save memory
// if(1==2 && rawData!=null && cache.getXObjectCount()>30){
// String ref=XObject.getObjectRefAsString();
//
// cache.resetXObject(name,ref,rawData);
// XObject=null;
//
// }
}
}
}
if (subtype != PdfDictionary.Form) {
final ImageDecoder imageDecoder;
if (commandID != Cmd.Do) {
imageDecoder = new ID(imageCount, currentPdfFile, errorTracker, customImageHandler, objectStoreStreamRef, pdfImages, pageData, imagesInFile);
} else {
imageDecoder = new DO(imageCount, currentPdfFile, errorTracker, customImageHandler, objectStoreStreamRef, pdfImages, pageData, imagesInFile);
}
imageDecoder.setRes(cache);
imageDecoder.setGS(gs);
imageDecoder.setSamplingOnly(getSamplingOnly);
imageDecoder.setStreamType(streamType);
//imageDecoder.setName(fileName);
imageDecoder.setMultiplyer(multiplyer);
//imageDecoder.setFloatValue(SamplingUsed, samplingUsed);
//imageDecoder.setFileHandler(currentPdfFile);
imageDecoder.setRenderer(current);
parserOptions.isPrinting(isPrinting);
imageDecoder.setParams(parserOptions);
if (commandID == Cmd.Do) {
//size test to remove odd lines in abacus file abacus/EP_Print_Post_Suisse_ID_120824.pdf
if (XObject == null || !parserOptions.isLayerVisible() || (layers != null && !layers.isVisible(XObject)) || (gs.CTM != null && gs.CTM[1][1] == 0 && gs.CTM[1][0] != 0 && Math.abs(gs.CTM[1][0]) < 0.2)) {
//ignore
} else {
String name = parser.generateOpAsString(0, true);
//name is not unique if in form so we add form level to separate out
if (formLevel > 0) {
name = formName + '_' + formLevel + '_' + name;
}
dataPointer = imageDecoder.processImage(name, dataPointer, XObject);
}
} else if (parserOptions.isLayerVisible()) {
dataPointer = imageDecoder.processImage(dataPointer, startInlineStream, parser.getStream(), tokenNumber);
}
imageCount++;
imagesInFile = imageDecoder.getImagesInFile();
}
}
break;
case Cmd.T3_COMMAND:
if (!getSamplingOnly && (renderText || textExtracted)) {
if (t3Decoder == null) {
t3Decoder = new T3Decoder();
}
t3Decoder.setCommands(parser);
t3Decoder.setCommands(parser);
t3Decoder.processToken(commandID);
}
break;
}
} catch (final Exception e) {
LogWriter.writeLog("[PDF] " + e + " Processing token >" + Cmd.getCommandAsString(commandID) + "<>" + parserOptions.getFileName() + " <" + parserOptions.getPageNumber());
//only exit if no issue with stream
if (!isDataValid) {
dataPointer = streamSize;
}
} catch (final OutOfMemoryError ee) {
errorTracker.addPageFailureMessage("Memory error decoding token stream " + ee);
LogWriter.writeLog("[MEMORY] Memory error - trying to recover");
}
//save for next command
startCommand = dataPointer;
//reset array of trailing values
parser.reset();
//increase pointer
incrementTokenNumber();
}
//break at end
if (streamSize <= dataPointer) {
break;
}
}
if (!parserOptions.renderDirectly() && statusBar != null) {
statusBar.percentageDone = 100;
}
//pick up TextDecoder values
isTTHintingRequired = textDecoder.isTTHintingRequired();
if (returnText) {
return lastTextValue;
} else {
return "";
}
}
void processColor(final CommandParser parser, final int commandID) {
if (commandID != Cmd.SCN && commandID != Cmd.scn && commandID != Cmd.SC && commandID != Cmd.sc) {
current.writeCustom(DynamicVectorRenderer.RESET_COLORSPACE, null);
}
switch (commandID) {
case Cmd.cs:
CS.execute(true, parser.generateOpAsString(0, true), gs, cache, currentPdfFile, isPrinting);
break;
case Cmd.CS:
CS.execute(false, parser.generateOpAsString(0, true), gs, cache, currentPdfFile, isPrinting);
break;
case Cmd.rg:
RG.execute(true, gs, parser, cache);
break;
case Cmd.RG:
RG.execute(false, gs, parser, cache);
break;
case Cmd.SCN:
SCN.execute(false, gs, parser, cache);
break;
case Cmd.scn:
SCN.execute(true, gs, parser, cache);
break;
case Cmd.SC:
SCN.execute(false, gs, parser, cache);
break;
case Cmd.sc:
SCN.execute(true, gs, parser, cache);
break;
case Cmd.g:
G.execute(true, gs, parser, cache);
break;
case Cmd.G:
G.execute(false, gs, parser, cache);
break;
case Cmd.k:
K.execute(true, gs, parser, cache);
break;
case Cmd.K:
K.execute(false, gs, parser, cache);
break;
}
}
private void processGScommands(final CommandParser parser, final int commandID) {
switch (commandID) {
case Cmd.cm:
CM.execute(gs, parser);
break;
case Cmd.q:
gs = Q.execute(gs, true, graphicsStates, current);
break;
case Cmd.Q:
gs = Q.execute(gs, false, graphicsStates, current);
break;
case Cmd.gs:
if (!getSamplingOnly) {
final String key = parser.generateOpAsString(0, true);
final byte[] data = cache.GraphicsStates.get(key);
final PdfObject GS = getExtStateObjectFromRefOrDirect(currentPdfFile, data);
gs.setMode(GS);
final int blendMode = gs.getBMValue();
current.setGraphicsState(GraphicsState.FILL, gs.getAlpha(GraphicsState.FILL), blendMode);
current.setGraphicsState(GraphicsState.STROKE, gs.getAlpha(GraphicsState.STROKE), blendMode);
currentBlendMode = blendMode;
}
break;
}
}
private void processShapeCommands(final CommandParser parser, final PdfShape currentDrawShape, final int commandID) {
switch (commandID) {
case Cmd.B:
if (!removeRenderImages) {
final Shape currentShape = B.execute(false, false, gs, formLevel, currentDrawShape, current, parserOptions);
//track for user if required
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.B, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.b:
if (!removeRenderImages) {
final Shape currentShape = B.execute(false, true, gs, formLevel, currentDrawShape, current, parserOptions);
//track for user if required
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.b, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.bstar:
if (!removeRenderImages) {
final Shape currentShape = B.execute(true, true, gs, formLevel, currentDrawShape, current, parserOptions);
//track for user if required
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.bstar, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.Bstar:
if (!removeRenderImages) {
final Shape currentShape = B.execute(true, false, gs, formLevel, currentDrawShape, current, parserOptions);
//track for user if required
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.Bstar, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.c:
final float x3 = parser.parseFloat(1);
final float y3 = parser.parseFloat(0);
final float x2 = parser.parseFloat(3);
final float y2 = parser.parseFloat(2);
final float x = parser.parseFloat(5);
final float y = parser.parseFloat(4);
currentDrawShape.addBezierCurveC(x, y, x2, y2, x3, y3);
break;
case Cmd.d:
D.execute(parser, gs);
break;
case Cmd.F:
if (!removeRenderImages) {
F.execute(tokenNumber, false, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
}
break;
case Cmd.f:
if (!removeRenderImages) {
F.execute(tokenNumber, false, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
}
break;
case Cmd.Fstar:
if (!removeRenderImages) {
F.execute(tokenNumber, true, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
}
break;
case Cmd.fstar:
if (!removeRenderImages) {
F.execute(tokenNumber, true, formLevel, currentDrawShape, gs, cache, currentPdfFile, current, parserOptions, multiplyer);
}
break;
case Cmd.h:
currentDrawShape.closeShape();
break;
//case Cmd.i:
// I();
//break;
case Cmd.J:
J.execute(false, parser.parseInt(), gs);
break;
case Cmd.j:
J.execute(true, parser.parseInt(), gs);
break;
case Cmd.l:
currentDrawShape.lineTo(parser.parseFloat(1), parser.parseFloat(0));
break;
case Cmd.M:
gs.setMitreLimit((int) (parser.parseFloat(0)));
break;
case Cmd.m:
currentDrawShape.setClip(false);
currentDrawShape.moveTo(parser.parseFloat(1), parser.parseFloat(0));
break;
case Cmd.n:
N.execute(currentDrawShape, gs, formLevel, parserOptions.defaultClip, parserOptions, current, pageData);
break;
case Cmd.re:
currentDrawShape.appendRectangle(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
break;
case Cmd.S:
if (!removeRenderImages) {
final Shape currentShape = S.execute(false, gs, currentDrawShape, current, parserOptions);
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.S, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.s:
if (!removeRenderImages) {
final Shape currentShape = S.execute(true, gs, currentDrawShape, current, parserOptions);
//track for user if required
if (currentShape != null) {
final ShapeTracker customShapeTracker = parserOptions.getCustomShapeTraker();
if (customShapeTracker != null) {
customShapeTracker.addShape(tokenNumber, Cmd.s, currentShape, gs.nonstrokeColorSpace.getColor(), gs.strokeColorSpace.getColor());
}
}
}
break;
case Cmd.v:
currentDrawShape.addBezierCurveV(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
break;
case Cmd.w:
gs.setLineWidth(parser.parseFloat(0));
break;
case Cmd.Wstar: //set Winding rule
currentDrawShape.setEVENODDWindingRule();
currentDrawShape.setClip(true);
break;
case Cmd.W:
currentDrawShape.setNONZEROWindingRule();
currentDrawShape.setClip(true);
break;
case Cmd.y:
currentDrawShape.addBezierCurveY(parser.parseFloat(3), parser.parseFloat(2), parser.parseFloat(1), parser.parseFloat(0));
break;
}
}
/**
* return boolean flags with appropriate ket
*/
public boolean getBooleanValue(final int key) {
switch (key) {
case ValueTypes.EmbeddedFonts:
return pdfFontFactory.hasEmbeddedFonts();
case ValueTypes.StructuredContent:
if (contentHandler == null) {
return false;
} else {
return contentHandler.hasContent();
}
case DecodeStatus.PageDecodingSuccessful:
return errorTracker.ispageSuccessful();
case DecodeStatus.NonEmbeddedCIDFonts:
return pdfFontFactory.hasNonEmbeddedCIDFonts();
case DecodeStatus.ImagesProcessed:
return parserOptions.imagesProcessedFully;
case DecodeStatus.TooManyShapes:
return parserOptions.tooManyShapes;
case DecodeStatus.YCCKImages:
return parserOptions.hasYCCKimages;
case DecodeStatus.TTHintingRequired:
return isTTHintingRequired;
default:
throw new RuntimeException("Unknown value " + key);
}
}
public void dispose() {
if (pdfData != null) {
this.pdfData.dispose();
}
//this.pageLines=null;
}
public void setIntValue(final int key, final int value) {
switch (key) {
case ValueTypes.PageNum:
parserOptions.setPageNumber(value);
break;
/*
* tells program to try and use Java's font printing if possible
* as work around for issue with PCL printing
*/
case ValueTypes.TextPrint:
parserOptions.setTextPrint(value);
break;
}
}
public void setXMLExtraction(final boolean isXMLExtraction) {
this.isXMLExtraction = isXMLExtraction;
}
public void setParameters(final boolean isPageContent, final boolean renderPage, final int renderMode, final int extractionMode, final boolean isPrinting, final boolean useJavaFX) {
parserOptions.init(isPageContent, renderPage, renderMode, extractionMode, isPrinting, useJavaFX);
/*
* flags
*/
renderText = renderPage && (renderMode & PdfDecoderInt.RENDERTEXT) == PdfDecoderInt.RENDERTEXT;
textExtracted = (extractionMode & PdfDecoderInt.TEXT) == PdfDecoderInt.TEXT;
textColorExtracted = (extractionMode & PdfDecoderInt.TEXTCOLOR) == PdfDecoderInt.TEXTCOLOR;
removeRenderImages = renderPage && (renderMode & PdfDecoderInt.REMOVE_RENDERSHAPES) == PdfDecoderInt.REMOVE_RENDERSHAPES;
}
public void setFormLevel(final int value) {
formLevel = value;
}
/**
* process each token and add to text or decode if not known command, place
* in array (may be operand which is later used by command)
*/
private int processTextToken(final Tj textDecoder, final CommandParser parser, final int commandID, int startCommand, final int dataPointer) {
textDecoder.setGS(gs);
final TextState currentTextState = gs.getTextState();
if (commandID == Cmd.BT && parserOptions.isRenderPage()) {
//save for later and set TR
current.drawClip(gs, parserOptions.defaultClip, true);
current.drawTR(GraphicsState.FILL);
}
if (commandID == Cmd.Tj || commandID == Cmd.TJ || commandID == Cmd.quote || commandID == Cmd.doubleQuote) {
if (currentTextState.hasFontChanged() && currentTextState.getTfs() != 0) { //avoid text which does not appear as zero size
//switch to correct font
final String fontID = currentTextState.getFontID();
final PdfFont restoredFont = FontResolver.resolveFont(gs, this, fontID, pdfFontFactory, cache);
if (restoredFont != null) {
currentFontData = restoredFont;
}
}
if (currentFontData == null) {
currentFontData = new PdfFont(currentPdfFile);
//use name for poss mappings (ie Helv)
currentFontData.getGlyphData().logicalfontName = StandardFonts.expandName(currentTextState.getFontID());
}
if (currentTextState.hasFontChanged()) {
currentTextState.setFontChanged(false);
}
}
switch (commandID) {
case Cmd.BMC:
parserOptions.setLayerLevel(parserOptions.getLayerLevel() + 1);
//flag so we can next values
if (parserOptions.isLayerVisible()) {
parserOptions.getLayerVisibility().add(parserOptions.getLayerLevel());
}
if (contentHandler != null) {
contentHandler.BMC(parser.generateOpAsString(0, false));
}
break;
case Cmd.BDC:
final PdfObject BDCobj = BDC.execute(startCommand, dataPointer, parser.getStream(),
parser.generateOpAsString(0, false), gs, currentPdfFile, current, parserOptions);
//work around for unbalanced clip
if (BDCobj.getClip() != null) {
BDCDepth = graphicsStates.getDepth();
} else {
BDCDepth = -1;
}
//track setting and use in preference for text extraction
textDecoder.setActualText(BDCobj.getTextStreamValue(PdfDictionary.ActualText));
if (contentHandler != null) {
contentHandler.BDC(BDCobj);
}
break;
case Cmd.BT:
currentTextState.resetTm();
break;
case Cmd.EMC:
textDecoder.setActualText(null);
if (contentHandler != null) {
contentHandler.EMC();
}
//balance stack inside tagged commands
if (parserOptions.getLayerLevel() == 1 && BDCDepth != -1 && BDCDepth != graphicsStates.getDepth()) {
graphicsStates.correctDepth(0, gs, current);
}
BDCDepth = -1;
EMC.execute(current, gs, parserOptions);
break;
case Cmd.ET:
current.writeCustom(DynamicVectorRenderer.RESET_COLORSPACE, null);
if (gs.getTextRenderType() == GraphicsState.CLIPTEXT) {
current.drawClip(gs, null, false);
}
break;
case Cmd.DP:
if (contentHandler != null) {
final MCObject obj = new MCObject(parser.generateOpAsString(0, false));
currentPdfFile.readObject(obj);
contentHandler.DP(obj);
}
break;
case Cmd.Tf:
currentTextState.TF(parser.parseFloat(0), (parser.generateOpAsString(1, true)));
break;
case Cmd.Tc:
currentTextState.setCharacterSpacing(parser.parseFloat(0));
break;
case Cmd.TD:
TD.execute(false, parser.parseFloat(1), parser.parseFloat(0), currentTextState);
multipleTJs = false;
break;
case Cmd.Td:
TD.execute(true, parser.parseFloat(1), parser.parseFloat(0), currentTextState);
multipleTJs = false;
break;
case Cmd.Tj:
if (currentTextState.getTfs() != 0) { //avoid zero size text
lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
}
multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.
break;
case Cmd.TJ:
lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.
break;
case Cmd.quote:
TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);
multipleTJs = false;
lastTextValue = textDecoder.TJ(currentTextState, currentFontData, parser.getStream(), startCommand, dataPointer, multipleTJs);
multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.
break;
case Cmd.doubleQuote:
final byte[] characterStream = parser.getStream();
currentTextState.setCharacterSpacing(parser.parseFloat(1));
currentTextState.setWordSpacing(parser.parseFloat(2));
TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);
multipleTJs = false;
//we can have values which are not accounted for before stream so rollon so we ignore
while (characterStream[startCommand] != '(' && characterStream[startCommand] != '<' && characterStream[startCommand] != '[') {
startCommand++;
}
lastTextValue = textDecoder.TJ(currentTextState, currentFontData, characterStream, startCommand, dataPointer, multipleTJs);
multipleTJs = true; //flag will be reset by Td/Tj/T* if move takes place.
break;
case Cmd.Tm:
//set Tm matrix
currentTextState.Tm[0][0] = parser.parseFloat(5);
currentTextState.Tm[0][1] = parser.parseFloat(4);
currentTextState.Tm[0][2] = 0;
currentTextState.Tm[1][0] = parser.parseFloat(3);
currentTextState.Tm[1][1] = parser.parseFloat(2);
currentTextState.Tm[1][2] = 0;
currentTextState.Tm[2][0] = parser.parseFloat(1);
currentTextState.Tm[2][1] = parser.parseFloat(0);
currentTextState.Tm[2][2] = 1;
//keep position in case we need
currentTextState.setTMAtLineStart();
multipleTJs = false;
break;
case Cmd.Tstar:
TD.relativeMove(0, -currentTextState.getLeading(), currentTextState);
multipleTJs = false;
break;
case Cmd.Tr:
final int value = TR.execute(parser.parseInt(), gs);
if (parserOptions.isRenderPage() && !parserOptions.renderDirectly()) {
current.drawTR(value);
}
break;
case Cmd.Ts:
currentTextState.setTextRise(parser.parseFloat(0));
break;
case Cmd.Tw:
currentTextState.setWordSpacing(parser.parseFloat(0));
break;
case Cmd.Tz:
currentTextState.setHorizontalScaling(parser.parseFloat(0) / 100);
break;
case Cmd.TL:
currentTextState.setLeading(parser.parseFloat(0));
break;
}
return dataPointer;
}
public PdfObjectCache getObjectCache() {
return cache;
}
/**
* pass in BBox so we can work out if we ignore scaling
*
* @param BBox
*/
public void setBBox(final float[] BBox) {
// BBox no longer used
}
public int getBlendMode() {
return currentBlendMode;
}
public void incrementTokenNumber() {
tokenNumber++;
current.setValue(DynamicVectorRenderer.TOKEN_NUMBER, tokenNumber);
}
private static ExtGStateObject getExtStateObjectFromRefOrDirect(final PdfObjectReader currentPdfFile, final byte[] data) {
final ExtGStateObject obj = new ExtGStateObject(new String(data));
if (data[0] == '<') {
obj.setStatus(PdfObject.UNDECODED_DIRECT);
} else {
obj.setStatus(PdfObject.UNDECODED_REF);
}
obj.setUnresolvedData(data, PdfDictionary.ExtGState);
final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile.getObjectReader());
objectDecoder.checkResolved(obj);
return obj;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy