![JAR search and dependency download from the Maven repository](/logo.png)
org.jpedal.io.PdfFileReader Maven / Gradle / Ivy
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* PdfFileReader.java
* ---------------
*/
package org.jpedal.io;
import java.io.*;
import java.security.PrivateKey;
import java.security.cert.Certificate;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import org.jpedal.constants.PDFflags;
import org.jpedal.exception.PdfException;
import org.jpedal.exception.PdfSecurityException;
import org.jpedal.io.security.CryptoAES;
import org.jpedal.io.security.DecryptionFactory;
import org.jpedal.io.types.CompressedObjects;
import org.jpedal.io.types.ObjectReader;
import org.jpedal.io.types.Offsets;
import org.jpedal.io.types.RefTable;
import org.jpedal.objects.raw.*;
import org.jpedal.utils.LogWriter;
/**
* provides access to the file using Random access class to
* read bytes and strings from a pdf file. Pdf file is a mix of
* character and binary data streams
*/
public class PdfFileReader {
ObjectReader objectReader;
private PrivateKey key;
private Certificate certificate;
private LinearizedHintTable linHintTable;
/**
* used to cache last compressed object
*/
private byte[] lastCompressedStream;
/**
* used to cache last compressed object
*/
private Map lastOffsetStart;
private Map lastOffsetEnd;
private PdfObject compressedObj;
/**
* used to cache last compressed object
*/
private int lastFirst = -1, lastCompressedID = -1;
private RefTable refTable;
public PdfObject getInfoObject() {
return refTable.getInfoObject();
}
/**
* set size over which objects kept on disk
*/
public void setCacheSize(final int miniumumCacheSize) {
objectReader.setCacheSize(miniumumCacheSize);
}
PdfObject encyptionObj;
//private boolean isFDF=false;
private DecryptionFactory decryption;
/**
* encryption password
*/
private byte[] encryptionPassword;
/**
* file access
*/
private RandomAccessBuffer pdf_datafile;
// private final static byte[] endObj = { 32, 111, 98, 106 }; //pattern endobj
/**
* location from the reference table of each
* object in the file
*/
private Offsets offset = new Offsets(2000);
/**
* should never be final
*/
public static int alwaysCacheInMemory = 16384;
private long eof;
/**
* length of each object
*/
private int[] ObjLengthTable;
/**
* return pdf data
*/
public byte[] getBuffer() {
return pdf_datafile.getPdfBuffer();
}
public void init(final RandomAccessBuffer pdf_datafile) {
this.pdf_datafile = pdf_datafile;
try {
eof = pdf_datafile.length();
} catch (final IOException e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
objectReader = new ObjectReader(pdf_datafile, eof, this);
refTable = new RefTable(pdf_datafile, eof, offset);
}
/**
* read an object in the pdf into a Object which can be an indirect or an object
*/
public final void readObject(final PdfObject pdfObject) {
if (pdfObject.isDataExternal() && linHintTable != null) {
readExternalObject(pdfObject);
} else {
final String objectRef = pdfObject.getObjectRefAsString();
final int id = pdfObject.getObjectRefID();
final boolean debug = false;
if (debug) {
System.err.println("reading objectRef=" + objectRef + "< isCompressed=" + offset.isCompressed(id));
}
final boolean isCompressed = offset.isCompressed(id);
pdfObject.setCompressedStream(isCompressed);
//any stream
final byte[] raw;
/*read raw object data*/
if (isCompressed) {
raw = readCompressedObject(pdfObject);
} else {
movePointer(offset.elementAt(id));
if (objectRef.charAt(0) == '<') {
raw = objectReader.readObjectData(-1, pdfObject);
} else {
if (ObjLengthTable == null || offset.isRefTableInvalid()) { //isEncryptionObject
//allow for bum object
if (getPointer() == 0) {
raw = new byte[0];
} else {
raw = objectReader.readObjectData(-1, pdfObject);
}
} else if (id > ObjLengthTable.length || ObjLengthTable[id] == 0) {
LogWriter.writeLog(objectRef + " cannot have offset 0");
raw = new byte[0];
} else {
raw = objectReader.readObjectData(ObjLengthTable[id], pdfObject);
}
}
}
if (raw.length > 1) {
final ObjectDecoder objDecoder = new ObjectDecoder(this);
objDecoder.readDictionaryAsObject(pdfObject, 0, raw);
}
}
}
private void readExternalObject(final PdfObject pdfObject) {
final int ref = pdfObject.getObjectRefID();
final int generation = pdfObject.getObjectRefGeneration();
final byte[] pageData = readObjectAsByteArray(pdfObject, isCompressed(ref, generation), ref, generation);
pdfObject.setStatus(PdfObject.UNDECODED_DIRECT);
pdfObject.setUnresolvedData(pageData, PdfDictionary.Page);
ObjectDecoder.resolveFully(pdfObject, this);
}
private byte[] readCompressedObject(final PdfObject pdfObject) {
byte[] raw;
final int objectID = pdfObject.getObjectRefID();
final int compressedID = offset.elementAt(objectID);
String startID = null;
int First = lastFirst;
boolean isCached = true; //assume cached
//see if we already have values
byte[] compressedStream = lastCompressedStream;
Map offsetStart = lastOffsetStart;
Map offsetEnd = lastOffsetEnd;
PdfObject Extends = null;
if (lastOffsetStart != null && compressedID == lastCompressedID) {
startID = lastOffsetStart.get(String.valueOf(objectID));
}
//read 1 or more streams
while (startID == null) {
if (Extends != null) {
compressedObj = Extends;
} else if (compressedID != lastCompressedID) {
isCached = false;
movePointer(offset.elementAt(compressedID));
raw = objectReader.readObjectData(ObjLengthTable[compressedID], null);
compressedObj = new CompressedObject(compressedID, 0);
final ObjectDecoder objDecoder = new ObjectDecoder(this);
objDecoder.readDictionaryAsObject(compressedObj, 0, raw);
}
/*get offsets table see if in this stream*/
offsetStart = new HashMap();
offsetEnd = new HashMap();
First = compressedObj.getInt(PdfDictionary.First);
compressedStream = compressedObj.getDecodedStream();
CompressedObjects.extractCompressedObjectOffset(offsetStart, offsetEnd, First, compressedStream, compressedID, offset);
startID = offsetStart.get(String.valueOf(objectID));
Extends = compressedObj.getDictionary(PdfDictionary.Extends);
if (Extends == null) {
break;
}
}
if (!isCached) {
lastCompressedStream = compressedStream;
lastCompressedID = compressedID;
lastOffsetStart = offsetStart;
lastOffsetEnd = offsetEnd;
lastFirst = First;
}
/*put bytes in stream*/
final int start = First + Integer.parseInt(startID);
int end = compressedStream.length;
final String endID = offsetEnd.get(String.valueOf(objectID));
if (endID != null) {
end = First + Integer.parseInt(endID);
}
final int streamLength = end - start;
raw = new byte[streamLength];
System.arraycopy(compressedStream, start, raw, 0, streamLength);
pdfObject.setInCompressedStream(true);
return raw;
}
/**
* read a stream
*/
public final byte[] readStream(final PdfObject pdfObject, final boolean cacheValue,
final boolean decompress, final boolean keepRaw, final boolean isMetaData,
final boolean isCompressedStream, final String cacheName) {
final boolean debugStream = false;
boolean isCachedOnDisk = pdfObject.isCached();
byte[] data = null;
if (!isCachedOnDisk) {
data = pdfObject.getDecodedStream();
}
//BufferedOutputStream streamCache=null;
byte[] stream;
//decompress first time
if (data == null) {
stream = pdfObject.stream;
if (isCachedOnDisk) {
//decrypt the stream
try {
if (decryption != null && !isCompressedStream && (decryption.getBooleanValue(PDFflags.IS_METADATA_ENCRYPTED) || !isMetaData)) {
decryption.decrypt(null, pdfObject.getObjectRefAsString(), false, cacheName, false, false);
}
} catch (final Exception e) {
stream = null;
LogWriter.writeLog("Exception " + e);
}
}
if (stream != null) { /*decode and save stream*/
//decrypt the stream
try {
if (decryption != null && !isCompressedStream && (decryption.getBooleanValue(PDFflags.IS_METADATA_ENCRYPTED) || !isMetaData)) { // && pdfObject.getObjectType()!=PdfDictionary.ColorSpace){
stream = decryption.decrypt(stream, pdfObject.getObjectRefAsString(), false, null, false, false);
}
} catch (final PdfSecurityException e) {
stream = null;
LogWriter.writeLog("Exception " + e + " with " + pdfObject.getObjectRefAsString());
}
}
if (keepRaw) {
pdfObject.stream = null;
}
int length = 1;
if (stream != null || isCachedOnDisk) {
//values for CCITTDecode
int height = 1, width = 1;
final int newH = pdfObject.getInt(PdfDictionary.Height);
if (newH != -1) {
height = newH;
}
final int newW = pdfObject.getInt(PdfDictionary.Width);
if (newW != -1) {
width = newW;
}
final int newLength = pdfObject.getInt(PdfDictionary.Length);
if (newLength != -1) {
length = newLength;
}
/*allow for no width or length*/
if (height * width == 1) {
width = length;
}
final PdfArrayIterator filters = pdfObject.getMixedArray(PdfDictionary.Filter);
//check not handled elsewhere
int firstValue = PdfDictionary.Unknown;
if (filters != null && filters.hasMoreTokens()) {
firstValue = filters.getNextValueAsConstant(false);
}
if (debugStream) {
System.out.println("First filter=" + firstValue);
}
if (filters != null && firstValue != PdfDictionary.Unknown && firstValue != PdfFilteredReader.JPXDecode &&
firstValue != PdfFilteredReader.DCTDecode) {
if (debugStream) {
System.out.println("Decoding stream " + Arrays.toString(stream) + ' ' + pdfObject.isCached() + ' ' + pdfObject.getObjectRefAsString());
}
try {
final PdfFilteredReader filter = new PdfFilteredReader();
stream = filter.decodeFilters(ObjectUtils.setupDecodeParms(pdfObject, this), stream, filters, width, height, cacheName);
if (cacheName != null && encryptionPassword != null) {
final File f = new File(cacheName);
final FileInputStream fis = new FileInputStream(f);
byte[] temp = new byte[(int) f.length()];
fis.read(temp);
final CryptoAES aes = new CryptoAES();
temp = aes.encrypt(encryptionPassword, temp);
final FileOutputStream fos = new FileOutputStream(f);
fos.write(temp);
fos.close();
}
//flag if any error
pdfObject.setStreamMayBeCorrupt(filter.hasError());
} catch (final Exception e) {
LogWriter.writeLog("[PDF] Problem " + e + " decompressing stream ");
stream = null;
isCachedOnDisk = false; //make sure we return null, and not some bum values
}
//stop spurious match down below in caching code
length = 1;
} else if (stream != null && length != -1 && length < stream.length) {
/*make sure length correct*/
//if(stream.length!=length){
if (stream.length != length && length > 0) { //<-- last item breaks jbig??
final byte[] newStream = new byte[length];
System.arraycopy(stream, 0, newStream, 0, length);
stream = newStream;
} else if (stream.length == 1 && length == 0) {
stream = new byte[0];
}
}
}
if (stream != null && cacheValue) {
pdfObject.setDecodedStream(stream);
}
if (decompress && isCachedOnDisk) {
final int streamLength = (int) new File(cacheName).length();
byte[] bytes = new byte[streamLength];
try {
new BufferedInputStream(new FileInputStream(cacheName)).read(bytes);
if (encryptionPassword != null) {
final CryptoAES aes = new CryptoAES();
bytes = aes.decrypt(encryptionPassword, bytes);
}
} catch (final Exception e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
/*resize if length supplied*/
if (length != 1 && length < streamLength) {
final byte[] newStream = new byte[length];
System.arraycopy(bytes, 0, newStream, 0, length);
bytes = newStream;
}
return bytes;
}
} else {
stream = data;
}
if (stream == null) {
return null;
}
//make a a DEEP copy so we cant alter
final int len = stream.length;
final byte[] copy = new byte[len];
System.arraycopy(stream, 0, copy, 0, len);
return copy;
}
/**
* give user access to internal flags such as user permissions
*/
@SuppressWarnings("UnusedDeclaration")
public int getPDFflag(final Integer flag) {
if (decryption == null) {
return -1;
} else {
return decryption.getPDFflag(flag);
}
}
public void spoolStreamDataToDisk(final File tmpFile, long start, final int size) throws Exception {
movePointer(start);
boolean hasValues = false;
//final boolean streamFound=false;
boolean startStreamFound = false;
// Create output file
final BufferedOutputStream array = new BufferedOutputStream(new FileOutputStream(tmpFile));
int bufSize = -1;
int startStreamCount = 0;
int realPos = 0;
//final int streamCount=0;
final int XXX = 2 * 1024 * 1024;
final boolean debug = false;
if (debug) {
System.out.println("=============================");
}
if (bufSize < 1) {
bufSize = 128;
}
//array for data
int ptr = 0, maxPtr = bufSize;
byte[] readData = new byte[maxPtr];
byte[] buffer = null;
//final boolean inStream=false;
long pointer;
/*read the object or block*/
try {
byte currentByte;
int i = bufSize - 1, offset = -bufSize;
while (true) {
i++;
if (i == bufSize) { //read the next block
pointer = getPointer();
if (start == -1) {
start = pointer;
}
/*adjust buffer if less than bytes left in file*/
if (pointer + bufSize > eof) {
bufSize = (int) (eof - pointer);
}
bufSize += 6;
buffer = new byte[bufSize];
pdf_datafile.read(buffer); //get bytes into buffer
offset += i;
i = 0;
}
/*write out and look for endobj at end*/
//lastByte=currentByte;
currentByte = buffer[i];
//look for start of stream and set inStream true
if ((startStreamFound) && (hasValues || currentByte != 13 && currentByte != 10)) { //avoid trailing CR/LF
array.write(currentByte);
hasValues = true;
realPos++;
}
if (startStreamCount < 6 && currentByte == ObjectReader.startStream[startStreamCount]) {
startStreamCount++;
if (startStreamCount == 6) { //stream start found so log
startStreamFound = true;
}
} else {
startStreamCount = 0;
}
if (realPos >= size) {
break;
}
//if(!inStream){
readData[ptr] = currentByte;
ptr++;
if (ptr == maxPtr) {
if (maxPtr < XXX) {
maxPtr *= 2;
} else {
maxPtr += 100000;
}
final byte[] tmpArray = new byte[maxPtr];
System.arraycopy(readData, 0, tmpArray, 0, readData.length);
readData = tmpArray;
}
}
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " reading object");
}
if (array != null) {
array.flush();
array.close();
}
}
public void spoolStreamDataToDisk(final File tmpFile, long start) throws Exception {
movePointer(start);
boolean hasValues = false;
// Create output file
final BufferedOutputStream array = new BufferedOutputStream(new FileOutputStream(tmpFile));
int bufSize = -1;
//PdfObject pdfObject=null;
int startStreamCount = 0; //newCacheSize=-1,;
boolean startStreamFound = false;
//if(pdfObject!=null) //only use if values found
//newCacheSize=this.newCacheSize;
final int XXX = 2 * 1024 * 1024;
final int rawSize = bufSize;
int realPos = 0;
final boolean debug = false;
boolean lengthSet = false; //start false and set to true if we find /Length in metadata
// final boolean streamFound=false;
if (debug) {
System.out.println("=============================");
}
if (bufSize < 1) {
bufSize = 128;
}
// if(newCacheSize!=-1 && bufSize>newCacheSize)
//bufSize=newCacheSize;
//array for data
int ptr = 0, maxPtr = bufSize;
byte[] readData = new byte[maxPtr];
int charReached = 0, charReached2, charReached3 = 0;
byte[] buffer = null;
//final boolean inStream=false;
boolean ignoreByte;
/* adjust buffer if less than bytes left in file*/
long pointer;
/* read the object or block*/
try {
byte currentByte; //lastByte;
int i = bufSize - 1, offset = -bufSize;
//int blocksRead=0; //lastEnd=-1,lastComment=-1;
while (true) {
i++;
if (i == bufSize) {
//cache data and update counter
// if(blocksRead==1){
// dataRead=buffer;
// }else if(blocksRead>1){
//
// int bytesRead=dataRead.length;
// int newBytes=buffer.length;
// byte[] tmp=new byte[bytesRead+newBytes];
//
// //existing data into new array
// System.arraycopy(dataRead, 0, tmp, 0, bytesRead);
//
// //data from current block
// System.arraycopy(buffer, 0, tmp, bytesRead, newBytes);
//
// dataRead=tmp;
//
// //PUT BACK to switch on caching
// if(1==2 && streamFound && dataRead.length>newCacheSize) //stop if over max size
// break;
// }
// blocksRead++;
/*
* read the next block
*/
pointer = getPointer();
if (start == -1) {
start = pointer;
}
/* adjust buffer if less than bytes left in file*/
if (pointer + bufSize > eof) {
bufSize = (int) (eof - pointer);
}
bufSize += 6;
buffer = new byte[bufSize];
pdf_datafile.read(buffer);
offset += i;
i = 0;
}
/* write out and look for endobj at end*/
currentByte = buffer[i];
ignoreByte = false;
/*check for endobj at end - reset if not*/
if (currentByte == ObjectDecoder.endPattern[charReached])// && !inStream)
{
charReached++;
} else {
charReached = 0;
}
charReached2 = 0;
//look for start of stream and set inStream true
if ((startStreamFound) && (hasValues || currentByte != 13 && currentByte != 10)) { //avoid trailing CR/LF
array.write(currentByte);
hasValues = true;
}
if (startStreamCount < 6 && currentByte == ObjectReader.startStream[startStreamCount]) {
startStreamCount++;
} else {
startStreamCount = 0;
}
if (!startStreamFound && startStreamCount == 6) { //stream start found so log
//startStreamCount=offsetRef+startStreamCount;
startStreamFound = true;
}
/*if length not set we go on endstream in data*/
if (!lengthSet) {
//also scan for /Length if it had a valid size
if ((rawSize != -1) && (currentByte == ObjectReader.lengthString[charReached3])) { // && !inStream){
charReached3++;
if (charReached3 == 6) {
lengthSet = true;
}
} else {
charReached3 = 0;
}
}
if (charReached == 6 || charReached2 == 4) {
if (!lengthSet) {
break;
}
charReached = 0;
}
if (lengthSet && realPos >= rawSize) {
break;
}
if (!ignoreByte) { //|| !inStream)
readData[ptr] = currentByte;
ptr++;
if (ptr == maxPtr) {
if (maxPtr < XXX) {
maxPtr *= 2;
} else {
maxPtr += 100000;
}
final byte[] tmpArray = new byte[maxPtr];
System.arraycopy(readData, 0, tmpArray, 0, readData.length);
readData = tmpArray;
}
}
realPos++;
}
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " reading object");
}
if (array != null) {
array.flush();
array.close();
}
}
void closeFile() throws IOException {
if (pdf_datafile != null) {
pdf_datafile.close();
pdf_datafile = null;
}
}
public long getOffset(final int currentID) {
return offset.elementAt(currentID);
}
public byte[] getBytes(final long start, final int count) {
final byte[] buffer = new byte[count];
movePointer(start);
try {
pdf_datafile.read(buffer); //get next chars
} catch (final IOException e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
return buffer;
}
public void storeLinearizedTables(final LinearizedHintTable linHintTable) {
this.linHintTable = linHintTable;
}
public void dispose() {
if (decryption != null) {
decryption.flush();
decryption.dispose();
}
if (decryption != null) {
decryption.setCipherNull();
}
decryption = null;
this.compressedObj = null;
//any linearized data
if (linHintTable != null) {
linHintTable = null;
}
offset = null;
try {
if (pdf_datafile != null) {
pdf_datafile.close();
}
} catch (final IOException e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
pdf_datafile = null;
if (offset != null) {
offset.dispose();
}
}
//////////////////////////////////////////////////////////////////////
/**
* get pdf type in file (found at start of file)
*/
public final String getType() {
String pdf_type = "";
try {
movePointer(0);
pdf_type = pdf_datafile.readLine();
//strip off anything before
final int pos = pdf_type.indexOf("%PDF");
if (pos != -1) {
pdf_type = pdf_type.substring(pos + 5);
}
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " in reading type");
}
return pdf_type;
}
//////////////////////////////////////////////////////////////////////////
/**
* returns current location pointer and sets to new value
*/
public void movePointer(final long pointer) {
try {
//make sure inside file
if (pointer > pdf_datafile.length()) {
LogWriter.writeLog("Attempting to access ref outside file");
} else {
pdf_datafile.seek(pointer);
}
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " moving pointer to " + pointer + " in file.");
}
}
//////////////////////////////////////////////////
/**
* gets pointer to current location in the file
*/
private long getPointer() {
long old_pointer = 0;
try {
old_pointer = pdf_datafile.getFilePointer();
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " getting pointer in file");
}
return old_pointer;
}
/**
* general routine to turn reference into id with object name
*/
@SuppressWarnings("UnusedParameters")
public final boolean isCompressed(final int ref, final int gen) {
return offset.isCompressed(ref);
}
public DecryptionFactory getDecryptionObject() {
return decryption;
}
public void setPassword(final String password) {
this.encryptionPassword = password.getBytes();
//reset
if (decryption != null) {
decryption.reset(encryptionPassword);
}
}
/**
* read an object in the pdf into a Object which can be an indirect or an object
*/
public byte[] readObjectData(final PdfObject pdfObject) {
final String objectRef = pdfObject.getObjectRefAsString();
final int id = pdfObject.getObjectRefID();
//read the Dictionary data
if (pdfObject.isDataExternal()) {
//byte[] data=readObjectAsByteArray(pdfObject, objectRef, isCompressed(number,generation),number,generation);
final byte[] data = readObjectAsByteArray(pdfObject, false, id, 0);
//allow for data in Linear object not yet loaded
if (data == null) {
pdfObject.setFullyResolved(false);
LogWriter.writeLog("[Linearized] " + pdfObject.getObjectRefAsString() + " not yet available (15)");
return data;
}
}
final boolean debug = false;
if (debug) {
System.err.println("reading objectRef=" + objectRef + "< isCompressed=" + offset.isCompressed(id));
}
final boolean isCompressed = offset.isCompressed(id);
pdfObject.setCompressedStream(isCompressed);
//any stream
final byte[] raw;
/*read raw object data*/
if (isCompressed) {
raw = readCompressedObjectData(pdfObject, offset);
} else {
movePointer(offset.elementAt(id));
if (objectRef.charAt(0) == '<') {
raw = objectReader.readObjectData(-1, pdfObject);
} else {
if (ObjLengthTable == null || offset.isRefTableInvalid()) { //isEncryptionObject
//allow for bum object
if (getPointer() == 0) {
raw = new byte[0];
} else {
raw = objectReader.readObjectData(-1, pdfObject);
}
} else if (id > ObjLengthTable.length || ObjLengthTable[id] == 0) {
LogWriter.writeLog(objectRef + " cannot have offset 0");
raw = new byte[0];
} else {
raw = objectReader.readObjectData(ObjLengthTable[id], pdfObject);
}
}
}
return raw;
}
private byte[] readCompressedObjectData(final PdfObject pdfObject, final Offsets offset) {
byte[] raw;
final int objectID = pdfObject.getObjectRefID();
final int compressedID = offset.elementAt(objectID);
String startID = null;
int First = lastFirst;
boolean isCached = true; //assume cached
//see if we already have values
byte[] compressedStream = lastCompressedStream;
Map offsetStart = lastOffsetStart;
Map offsetEnd = lastOffsetEnd;
PdfObject Extends = null;
if (lastOffsetStart != null) {
startID = lastOffsetStart.get(String.valueOf(objectID));
}
//read 1 or more streams
while (startID == null) {
if (Extends != null) {
compressedObj = Extends;
} else if (compressedID != lastCompressedID) {
isCached = false;
movePointer(offset.elementAt(compressedID));
raw = objectReader.readObjectData(ObjLengthTable[compressedID], null);
compressedObj = new CompressedObject(compressedID, 0);
final ObjectDecoder objDecoder = new ObjectDecoder(this);
objDecoder.readDictionaryAsObject(compressedObj, 0, raw);
}
/* get offsets table see if in this stream*/
offsetStart = new HashMap();
offsetEnd = new HashMap();
First = compressedObj.getInt(PdfDictionary.First);
compressedStream = compressedObj.getDecodedStream();
CompressedObjects.extractCompressedObjectOffset(offsetStart, offsetEnd, First, compressedStream, compressedID, offset);
startID = offsetStart.get(String.valueOf(objectID));
Extends = compressedObj.getDictionary(PdfDictionary.Extends);
if (Extends == null) {
break;
}
}
if (!isCached) {
lastCompressedStream = compressedStream;
lastCompressedID = compressedID;
lastOffsetStart = offsetStart;
lastOffsetEnd = offsetEnd;
lastFirst = First;
}
/*put bytes in stream*/
final int start = First + Integer.parseInt(startID);
int end = compressedStream.length;
final String endID = offsetEnd.get(String.valueOf(objectID));
if (endID != null) {
end = First + Integer.parseInt(endID);
}
final int streamLength = end - start;
raw = new byte[streamLength];
System.arraycopy(compressedStream, start, raw, 0, streamLength);
pdfObject.setInCompressedStream(true);
return raw;
}
/**
* get object as byte[]
*
* @param isCompressed
* @param objectID
* @param gen
* @return
*/
public byte[] readObjectAsByteArray(final PdfObject pdfObject, final boolean isCompressed, final int objectID, final int gen) {
byte[] raw = null;
//data not in PDF stream
//if(pdfObject.isDataExternal()){
if (linHintTable != null) {
raw = linHintTable.getObjData(objectID);
}
if (raw == null) {
/* read raw object data*/
if (isCompressed) {
raw = readCompressedObjectAsByteArray(pdfObject, objectID, gen);
} else {
final long objectOffset = offset.elementAt(objectID);
if (objectOffset == 0) { //error or not yet loaded
return null;
}
movePointer(objectOffset);
if (ObjLengthTable == null || offset.isRefTableInvalid()) {
raw = objectReader.readObjectData(-1, pdfObject);
} else if (objectID > ObjLengthTable.length) {
return null;
} else {
raw = objectReader.readObjectData(ObjLengthTable[objectID], pdfObject);
}
}
}
//check first 10 bytes
int j = 0;
if (raw.length > 15) {
for (int i2 = 0; i2 < 10; i2++) {
if (raw[i2] == 'o' && raw[i2 + 1] == 'b' && raw[i2 + 2] == 'j') { //okay of we hit obj firat
break;
} else if (raw[i2] == 'e' && raw[i2 + 1] == 'n' && raw[i2 + 2] == 'd' && raw[i2 + 3] == 'o' && raw[i2 + 4] == 'b' && raw[i2 + 5] == 'j') {
j = i2 + 6;
objectReader.fileIsBroken = true;
break;
}
}
}
while (raw[j] == 10 || raw[j] == 12 || raw[j] == 32) {
j++;
}
if (j > 0) { //adjust to remove stuff at start
final byte[] oldBuffer = raw;
final int newLength = raw.length - j;
raw = new byte[newLength];
System.arraycopy(oldBuffer, j, raw, 0, newLength);
}
return raw;
}
private byte[] readCompressedObjectAsByteArray(final PdfObject pdfObject, final int objectID, final int gen) {
byte[] raw;
int compressedID = offset.elementAt(objectID);
String startID = null, compressedRef;
Map offsetStart = lastOffsetStart;
Map offsetEnd = lastOffsetEnd;
int First = lastFirst;
byte[] compressedStream;
boolean isCached = true; //assume cached
PdfObject compressedObj, Extends;
//see if we already have values
compressedStream = lastCompressedStream;
if (lastOffsetStart != null) {
startID = lastOffsetStart.get(String.valueOf(objectID));
}
int lastCompressedID = -1;
//read 1 or more streams
while (startID == null) {
isCached = false;
if (lastCompressedID == compressedID) {
throw new RuntimeException("Compressed Object stream corrupted - PDF file broken");
}
try {
pdf_datafile.seek(offset.elementAt(compressedID));
} catch (final IOException e) {
LogWriter.writeLog("Exception " + e + " moving pointer in file.");
}
lastCompressedID = compressedID;
raw = objectReader.readObjectData(ObjLengthTable[compressedID], null);
//may need to use compObj and not objectRef
final String compref = compressedID + " " + gen + " R";
compressedObj = new CompressedObject(compref);
final ObjectDecoder objDecoder = new ObjectDecoder(this);
objDecoder.readDictionaryAsObject(compressedObj, 0, raw);
/* get offsets table see if in this stream*/
offsetStart = new HashMap();
offsetEnd = new HashMap();
First = compressedObj.getInt(PdfDictionary.First);
//do later due to code above
compressedStream = compressedObj.getDecodedStream();
CompressedObjects.extractCompressedObjectOffset(offsetStart, offsetEnd, First, compressedStream, compressedID, offset);
startID = offsetStart.get(String.valueOf(objectID));
Extends = compressedObj.getDictionary(PdfDictionary.Extends);
if (Extends == null) {
compressedRef = null;
} else {
compressedRef = Extends.getObjectRefAsString();
}
if (compressedRef != null) {
compressedID = Integer.parseInt(compressedRef.substring(0, compressedRef.indexOf(' ')));
}
}
if (!isCached) {
lastCompressedStream = compressedStream;
lastOffsetStart = offsetStart;
lastOffsetEnd = offsetEnd;
lastFirst = First;
}
/* put bytes in stream*/
final int start = First + Integer.parseInt(startID);
int end = compressedStream.length;
final String endID = offsetEnd.get(String.valueOf(objectID));
if (endID != null) {
end = First + Integer.parseInt(endID);
}
final int streamLength = end - start;
raw = new byte[streamLength];
System.arraycopy(compressedStream, start, raw, 0, streamLength);
pdfObject.setInCompressedStream(true);
return raw;
}
///////////////////////////////////////////////////////////////////
/**
* get postscript data (which may be split across several objects)
*/
public byte[] readPageIntoStream(final PdfObject pdfObject) {
final byte[][] pageContents = pdfObject.getKeyArray(PdfDictionary.Contents);
//reset buffer object
byte[] binary_data = new byte[0];
//exit on empty
if (pageContents == null || (pageContents != null && pageContents.length > 0 && pageContents[0] == null)) {
return binary_data;
}
if (pageContents != null) {
final int count = pageContents.length;
byte[] decoded_stream_data;
PdfObject streamData;
//read all objects for page into stream
for (int ii = 0; ii < count; ii++) {
streamData = new StreamObject(new String(pageContents[ii]));
streamData.isDataExternal(pdfObject.isDataExternal()); //flag if being read from external stream
readObject(streamData);
decoded_stream_data = streamData.getDecodedStream();
//System.out.println(decoded_stream_data+" "+OLDdecoded_stream_data);
if (ii == 0 && decoded_stream_data != null) {
binary_data = decoded_stream_data;
} else {
binary_data = appendData(binary_data, decoded_stream_data);
}
}
}
return binary_data;
}
/**
* append into data_buffer by copying processed_data then
* binary_data into temp and then temp back into binary_data
*
* @param binary_data
* @param decoded_stream_data
*/
static byte[] appendData(byte[] binary_data, final byte[] decoded_stream_data) {
if (decoded_stream_data != null) {
final int current_length = binary_data.length + 1;
//find end of our data which we decompressed.
int processed_length = decoded_stream_data.length;
if (processed_length > 0) { //trap error
while (decoded_stream_data[processed_length - 1] == 0) {
processed_length--;
}
//put current into temp so I can resize array
final byte[] temp = new byte[current_length];
System.arraycopy(binary_data, 0, temp, 0, current_length - 1);
//add a space between streams
temp[current_length - 1] = ' ';
//resize
binary_data = new byte[current_length + processed_length];
//put original data back
System.arraycopy(temp, 0, binary_data, 0, current_length);
//and add in new data
System.arraycopy(decoded_stream_data, 0, binary_data, current_length, processed_length);
}
}
return binary_data;
}
public void setCertificate(final Certificate certificate, final PrivateKey key) {
this.certificate = certificate;
this.key = key;
}
/**
* read reference table start to see if new 1.5 type or traditional xref
*
* @throws PdfException
*/
public final PdfObject readReferenceTable(final PdfObject linearObj, final PdfFileReader pdfFileReader) throws PdfException {
final PdfObject rootObj = refTable.readReferenceTable(linearObj, this, objectReader);
final PdfObject encryptObj = refTable.getEncryptionObject();
if (encryptObj != null) {
setupDecryption(encryptObj, pdfFileReader);
}
//will be null if offset table invalid
ObjLengthTable = offset.calculateObjectLength((int) eof);
return rootObj;
}
public void setupDecryption(final PdfObject encryptObj, final PdfFileReader pdfFileReader) throws PdfSecurityException {
try {
final byte[] ID = refTable.getID();
if (certificate != null) {
decryption = new DecryptionFactory(ID, certificate, key);
} else {
decryption = new DecryptionFactory(ID, encryptionPassword);
}
//get values
if (encyptionObj == null) {
encyptionObj = new EncryptionObject(new String(encryptObj.getUnresolvedData()));
readObject(encyptionObj);
}
decryption.readEncryptionObject(encyptionObj, pdfFileReader);
} catch (final Error err) {
LogWriter.writeLog("No Bouncy castle on classpath " + err);
throw new RuntimeException("This PDF file is encrypted and JPedal needs an additional library to \n" +
"decode on the classpath (we recommend bouncycastle library).\n" +
"There is additional explanation at http://www.idrsolutions.com/additional-jars" + '\n');
}
}
public byte[] getEncHash() {
return encryptionPassword;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy