Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* RefTable.java
* ---------------
*/
package org.jpedal.io.types;
import java.io.IOException;
import org.jpedal.exception.PdfException;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.PdfFileReader;
import org.jpedal.io.RandomAccessBuffer;
import org.jpedal.objects.raw.CompressedObject;
import org.jpedal.objects.raw.PageObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.NumberUtils;
/**
*
*/
public class RefTable {
PdfObject encryptObj;
/**
* holds file ID
*/
private byte[] ID;
/**
* pattern to look for in objects
*/
static final String pattern = "obj";
/**
* info object
*/
private PdfObject infoObject;
static final int UNSET = -1;
static final int COMPRESSED = 1;
static final int LEGACY = 2;
private RandomAccessBuffer pdf_datafile;
static final byte[] oldPattern = {'x', 'r', 'e', 'f'};
private final long eof;
final Offsets offset;
public RefTable(final RandomAccessBuffer pdf_datafile, final long eof, final Offsets offset) {
this.pdf_datafile = pdf_datafile;
this.eof = eof;
this.offset = offset;
}
/**
* read first start ref from last 1024 bytes
*/
private int readFirstStartRef() throws PdfException {
//reset flag
offset.setRefTableInvalid(false);
int pointer = -1;
int i = 1019;
final StringBuilder startRef = new StringBuilder(10);
/* move to end of file and read last 1024 bytes*/
final int block = 1024;
byte[] lastBytes = new byte[block];
long end;
/*
* set endpoint, losing null chars and anything before EOF
*/
final int[] EndOfFileMarker = {37, 37, 69, 79};
int valReached = 3;
boolean EOFFound = false;
try {
end = eof;
/*
* lose nulls and other trash from end of file
*/
final int bufSize = 255;
while (true) {
final byte[] buffer = getBytes(end - bufSize, bufSize);
int offset = 0;
for (int ii = bufSize - 1; ii > -1; ii--) {
//see if we can decrement EOF tracker or restart check
if (!EOFFound) {
valReached = 3;
}
if (buffer[ii] == EndOfFileMarker[valReached]) {
valReached--;
EOFFound = true;
} else {
EOFFound = false;
}
//move to next byte
offset--;
if (valReached < 0) {
ii = -1;
}
}
//exit if found values on loop
if (valReached < 0) {
end -= offset;
break;
} else {
end -= bufSize;
}
//allow for no eof
if (end < 0) {
end = eof;
break;
}
}
//allow for very small file
int count = (int) (end - block);
if (count < 0) {
count = 0;
final int size = (int) eof;
lastBytes = new byte[size];
i = size + 3; //force reset below
}
lastBytes = getBytes(count, lastBytes.length);
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " reading last 1024 bytes");
throw new PdfException(e + " reading last 1024 bytes");
}
//look for tref as end of startxref
final int fileSize = lastBytes.length;
if (i > fileSize) {
i = fileSize - 5;
}
while (i > -1) {
//first check is because startref works as well a startxref !!
if (((lastBytes[i] == 116 && lastBytes[i + 1] == 120) || (lastBytes[i] == 114 && lastBytes[i + 1] == 116))
&& (lastBytes[i + 2] == 114)
&& (lastBytes[i + 3] == 101)
&& (lastBytes[i + 4] == 102)) {
break;
}
i--;
}
/*trap buggy files*/
if (i == -1) {
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e1 + " closing file");
}
throw new PdfException("No Startxref found in last 1024 bytes ");
}
i += 5; //allow for word length
//move to start of value ignoring spaces or returns
while (i < 1024 && (lastBytes[i] == 10 || lastBytes[i] == 32 || lastBytes[i] == 13)) {
i++;
}
//move to start of value ignoring spaces or returns
while ((i < 1024)
&& (lastBytes[i] != 10)
&& (lastBytes[i] != 32)
&& (lastBytes[i] != 13)) {
startRef.append((char) lastBytes[i]);
i++;
}
/*convert xref to string to get pointer*/
if (startRef.length() > 0) {
pointer = Integer.parseInt(startRef.toString());
}
if (pointer == -1) {
LogWriter.writeLog("No Startref found in last 1024 bytes ");
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e1 + " closing file");
}
throw new PdfException("No Startref found in last 1024 bytes ");
}
return pointer;
}
/**
* read reference table start to see if new 1.5 type or traditional xref
*
* @throws PdfException
*/
public final PdfObject readReferenceTable(final PdfObject linearObj, final PdfFileReader currentPdfFile, final ObjectReader objectReader) throws PdfException {
int pointer = -1;
final int eof = (int) this.eof;
boolean islinearizedCompressed = false;
if (linearObj == null) {
pointer = readFirstStartRef();
} else { //find at start of Linearized
final byte[] data = pdf_datafile.getPdfBuffer();
final int count = data.length;
int ptr = 5;
for (int i = 0; i < count; i++) {
//track start of this object (needed for compressed)
if (data[i] == 'e' && data[i + 1] == 'n' && data[i + 2] == 'd' && data[i + 3] == 'o' && data[i + 4] == 'b' && data[i + 5] == 'j') {
ptr = i + 6;
}
if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') {
pointer = i;
i = count;
} else if (data[i] == 'X' && data[i + 1] == 'R' && data[i + 2] == 'e' && data[i + 3] == 'f') {
islinearizedCompressed = true;
pointer = ptr;
while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) {
pointer++;
}
i = count;
}
}
}
offset.addXref(pointer);
PdfObject rootObj = null;
boolean isInvalid = true;
if (pointer >= eof || pointer == 0) {
LogWriter.writeLog("Pointer not if file - trying to manually find startref");
} else {
try {
if (islinearizedCompressed || isCompressedStream(pointer, eof)) {
rootObj = readCompressedStream(rootObj, pointer, currentPdfFile, objectReader, linearObj);
isInvalid = false;
} else {
rootObj = readLegacyReferenceTable(rootObj, pointer, eof, currentPdfFile);
isInvalid = false;
}
} catch (final Exception e) {
LogWriter.writeLog("[PDF] Exception reading reg table " + e + " - trying to manually find startref");
}
}
if (isInvalid) {
offset.setRefTableInvalid(true);
try {
rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
final byte[] rootDictBytes = BrokenRefTable.findFirstRootDict(pdf_datafile); //fix for 28184
if (rootDictBytes != null) {
final PdfObject pdfObject = new CompressedObject("0 0 R");
Dictionary.readDictionary(pdfObject, 0, rootDictBytes, -1, currentPdfFile);
encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);
if (encryptObj != null) {
final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
if (IDs != null && this.ID == null) {
// only the first encountered ID should be used as a fileID for decryption
this.ID = IDs[0];
}
}
infoObject = pdfObject.getDictionary(PdfDictionary.Info);
}
} catch (final Error err) {
throw new PdfException(err.getMessage() + " attempting to manually scan file for objects");
}
currentPdfFile.readObject(rootObj);
}
return rootObj;
}
/**
* read reference table from file so we can locate
* objects in pdf file and read the trailers
*/
private PdfObject readLegacyReferenceTable(PdfObject rootObj, int pointer, final int eof, final PdfFileReader currentPdfFile) throws PdfException {
int endTable, current = 0; //current object number
byte[] Bytes;
int bufSize = 1024;
/*read and decode 1 or more trailers*/
while (true) {
try {
//allow for pointer outside file
Bytes = Trailer.readTrailer(bufSize, pointer, eof, pdf_datafile);
} catch (final Exception e) {
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e + " closing file " + e1);
}
throw new PdfException("Exception " + e + " reading trailer");
}
if (Bytes == null) { //safety catch
break;
}
//get trailer
int i = 0;
final int maxLen = Bytes.length;
boolean trailerNotFound = true;
//for(int a=0;a<100;a++)
// System.out.println((char)Bytes[i+a]);
while (i < maxLen - 7) { //look for trailer keyword
if (Bytes[i] == 116 && Bytes[i + 1] == 114 && Bytes[i + 2] == 97 && Bytes[i + 3] == 105 &&
Bytes[i + 4] == 108 && Bytes[i + 5] == 101 && Bytes[i + 6] == 114) {
trailerNotFound = false;
break;
}
i++;
}
//save endtable position for later
endTable = i;
if (i == Bytes.length || trailerNotFound) {
break;
}
//move to beyond <<
while (Bytes[i] != 60 && Bytes[i - 1] != 60) {
i++;
}
i++;
final PdfObject pdfObject = new CompressedObject("0 0 R");
Dictionary.readDictionary(pdfObject, i, Bytes, -1, currentPdfFile);
//move to beyond >>
i = skipToEnd(Bytes, i);
//handle optional XRefStm
final int XRefStm = pdfObject.getInt(PdfDictionary.XRefStm);
if (XRefStm != -1) {
pointer = XRefStm;
} else { //usual way
pointer = getPointer(pointer, Bytes, i, maxLen);
}
i = StreamReaderUtils.skipSpaces(Bytes, 0);
if (pointer == -1) {
LogWriter.writeLog("No startRef");
/*now read the objects for the trailers*/
} else if (Bytes[i] == 120 && Bytes[i + 1] == 114 && Bytes[i + 2] == 101 && Bytes[i + 3] == 102) { //make sure starts xref
i = StreamReaderUtils.skipSpaces(Bytes, 5);
current = offset.readXRefs(current, Bytes, endTable, i, eof, pdf_datafile);
/*now process trailer values - only first set of table values for root, encryption and info*/
rootObj = processTrailer(rootObj, pdfObject);
//make sure first values used if several tables and code for prev
pointer = pdfObject.getInt(PdfDictionary.Prev);
//see if other trailers
if (pointer != -1 && pointer < this.eof) {
//reset values for loop
bufSize = 1024;
//track ref table so we can work out object length
offset.addXref(pointer);
} else { //reset if fails second test above
pointer = -1;
}
} else {
pointer = -1;
//needs to be read to pick up potential /Pages value
//noinspection ObjectAllocationInLoop
rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
currentPdfFile.readObject(rootObj);
offset.setRefTableInvalid(true);
}
if (pointer == -1) {
break;
}
}
if (encryptObj == null && rootObj != null) { //manual check for broken file (ignore if Encrypted)
rootObj = handleBrokenFile(rootObj, currentPdfFile);
}
//something gone wrong so manually index
if (rootObj == null) { //see 21382
offset.clear();
offset.reuse();
//needs to be read to pick up potential /Pages value
//noinspection ObjectAllocationInLoop
rootObj = new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
currentPdfFile.readObject(rootObj);
offset.setRefTableInvalid(true);
}
return rootObj;
}
private static PdfObject handleBrokenFile(PdfObject rootObj, final PdfFileReader currentPdfFile) {
int type = -1;
final int status = rootObj.getStatus();
final byte[] data = rootObj.getUnresolvedData();
try {
final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
objectDecoder.checkResolved(rootObj);
type = rootObj.getParameterConstant(PdfDictionary.Type);
} catch (final Exception e) { //we need to ignore so just catch, put back as was and log
rootObj.setStatus(status);
rootObj.setUnresolvedData(data, status);
LogWriter.writeLog("[PDF] Exception reading type on root object " + e);
}
//something gone wrong so manually index
if (type == PdfDictionary.Font) { //see 21153 - ref table in wrong order
rootObj = null; ///will reset in code at end
}
return rootObj;
}
private int getPointer(int pointer, final byte[] bytes, int i, final int maxLen) {
boolean hasRef = true;
i = StreamReaderUtils.skipSpaces(bytes, i);
while (bytes[i] == '%') {
while (bytes[i] != 10) {
i++;
}
i++;
}
/* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
/**/
//look for xref as end of startref
while (bytes[i] != 116 && bytes[i + 1] != 120 &&
bytes[i + 2] != 114 && bytes[i + 3] != 101 && bytes[i + 4] != 102) {
if (bytes[i] == 'o' && bytes[i + 1] == 'b' && bytes[i + 2] == 'j') {
hasRef = false;
break;
}
i++;
}
if (hasRef) {
//move to start of value ignoring spaces or returns
i = StreamReaderUtils.skipSpaces(bytes, i + 8);
final int s = i;
i = StreamReaderUtils.skipToEndOfKey(bytes, i);
/*convert xref to string to get pointer*/
if (s != i) {
pointer = NumberUtils.parseInt(s, i, bytes);
}
}
return pointer;
}
private static int skipToEnd(final byte[] bytes, int i) {
int level = 0;
final int length = bytes.length;
while (true) {
if (bytes[i] == 60 && bytes[i - 1] == 60) {
level++;
i++;
} else if (bytes[i] == '[') {
i++;
while (bytes[i] != ']') {
i++;
if (i == length) {
break;
}
}
} else if (bytes[i] == 62 && bytes[i - 1] == 62) {
level--;
i++;
}
if (level == 0) {
break;
}
i++;
}
return i;
}
/**
* read 1.5 compression stream ref table
*
* @throws PdfException
*/
private PdfObject readCompressedStream(PdfObject rootObj, int pointer, final PdfFileReader currentPdfFile, final ObjectReader objectReader, final PdfObject linearObj) throws PdfException {
while (pointer != -1) {
// get values to read stream ref
movePointer(pointer);
final PdfObject pdfObject = readStreamTableData(currentPdfFile, objectReader, offset, pdf_datafile);
// process trailer values - only first set of table values for root, encryption and info
rootObj = processTrailer(rootObj, pdfObject);
//make sure first values used if several tables and code for prev so long as not linearized
//may need adjusting as more examples turn up
if (linearObj != null) {
pointer = -1;
} else {
pointer = pdfObject.getInt(PdfDictionary.Prev);
//a non-compressed object table can follow a compressed one so we need to allow for this
if (pointer != -1 && !isCompressedStream(pointer, (int) eof)) {
return readLegacyReferenceTable(rootObj, pointer, (int) eof, currentPdfFile);
}
}
}
return rootObj;
}
private static PdfObject readStreamTableData(final PdfFileReader currentPdfFile, final ObjectReader objectReader,
final Offsets offset,
final RandomAccessBuffer pdf_datafile) throws PdfException {
final byte[] raw = objectReader.readObjectData(-1, null);
final PdfObject pdfObject = new CompressedObject(getObjectName(raw));
pdfObject.setCompressedStream(true);
final ObjectDecoder objectDecoder = new ObjectDecoder(currentPdfFile);
objectDecoder.readDictionaryAsObject(pdfObject, 0, raw);
//read the field sizes
final int[] fieldSizes = pdfObject.getIntArray(PdfDictionary.W);
//read the xrefs stream
byte[] xrefs = pdfObject.getDecodedStream();
if (xrefs == null) {
xrefs = currentPdfFile.readStream(pdfObject, true, true, false, false, true, null);
}
final int[] Index = pdfObject.getIntArray(PdfDictionary.Index);
if (Index == null) { //single set of values
CompressedObjects.readCompressedOffsets(0, 0, pdfObject.getInt(PdfDictionary.Size), fieldSizes, xrefs, offset, pdf_datafile);
} else { //pairs of values in Index[] array
final int count = Index.length;
int pntr = 0;
for (int aa = 0; aa < count; aa += 2) {
pntr = CompressedObjects.readCompressedOffsets(pntr, Index[aa], Index[aa + 1], fieldSizes, xrefs, offset, pdf_datafile);
}
}
return pdfObject;
}
private PdfObject processTrailer(PdfObject rootObj, final PdfObject pdfObject) {
if (rootObj == null) {
rootObj = pdfObject.getDictionary(PdfDictionary.Root);
encryptObj = pdfObject.getDictionary(PdfDictionary.Encrypt);
if (encryptObj != null) {
final byte[][] IDs = pdfObject.getStringArray(PdfDictionary.ID);
if (IDs != null && this.ID == null) {
// only the first encountered ID should be used as a fileID for decryption
this.ID = IDs[0];
}
}
infoObject = pdfObject.getDictionary(PdfDictionary.Info);
}
return rootObj;
}
static String getObjectName(final byte[] raw) {
final StringBuilder objectName = new StringBuilder();
char current1, last = ' ';
int matched = 0, i1 = 0;
final int length = raw.length;
while (i1 < length) {
current1 = (char) raw[i1];
//treat returns same as spaces
if (current1 == 10 || current1 == 13) {
current1 = ' ';
}
if (current1 == ' ' && last == ' ') { //lose duplicate or spaces
matched = 0;
} else if (current1 == pattern.charAt(matched)) { //looking for obj at end
matched++;
} else {
matched = 0;
objectName.append(current1);
}
if (matched == 3) {
break;
}
last = current1;
i1++;
}
//add end and put into Map
objectName.append('R');
return objectName.toString();
}
byte[] getBytes(final long start, final int count) {
final byte[] buffer = new byte[count];
if (start >= 0) {
try {
pdf_datafile.seek(start);
pdf_datafile.read(buffer); //get next chars
} catch (final IOException e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
}
return buffer;
}
void closeFile() throws IOException {
if (pdf_datafile != null) {
pdf_datafile.close();
pdf_datafile = null;
}
}
/**
* test first bytes to see if new 1.5 style table with obj or contains ref
*
* @throws PdfException
*/
private boolean isCompressedStream(int pointer, final int eof) throws PdfException {
final boolean debug = false;
int bufSize = 50, charReached_legacy = 0, charReached_comp1 = 0, charReached_comp2 = 0;
final int[] objStm = {'O', 'b', 'j', 'S', 't', 'm'};
final int[] XRef = {'X', 'R', 'e', 'f'};
int type = UNSET;
//flag to show if at start of data for check
boolean firstRead = true;
while (true) {
/* adjust buffer if less than 1024 bytes left in file */
if (pointer + bufSize > eof) {
bufSize = eof - pointer;
}
if (bufSize < 0) {
bufSize = 50;
}
if (pointer < 0) {
pointer += bufSize;
continue;
}
final byte[] buffer = getBytes(pointer, bufSize);
//allow for fact sometimes start of data wrong
if (firstRead && buffer[0] == 'r' && buffer[1] == 'e' && buffer[2] == 'f') {
charReached_legacy = 1;
}
firstRead = false; //switch off
/*look for xref or obj */
for (int i = 0; i < bufSize; i++) {
final byte currentByte = buffer[i];
if (debug) {
System.out.print((char) currentByte);
}
/* check for xref OR end - reset if not */
if (currentByte == oldPattern[charReached_legacy] && type != COMPRESSED) {
charReached_legacy++;
type = LEGACY;
} else if ((currentByte == objStm[charReached_comp1]) && (charReached_comp1 == 0 || type == COMPRESSED)) {
charReached_comp1++;
type = COMPRESSED;
} else if ((currentByte == XRef[charReached_comp2]) && (charReached_comp2 == 0 || type == COMPRESSED)) {
charReached_comp2++;
type = COMPRESSED;
} else {
charReached_legacy = 0;
charReached_comp1 = 0;
charReached_comp2 = 0;
type = UNSET;
}
if (charReached_legacy == 3 || charReached_comp1 == 4 || charReached_comp2 == 3) {
break;
}
}
if (charReached_legacy == 3 || charReached_comp1 == 4 || charReached_comp2 == 3) {
break;
}
//update pointer
pointer += bufSize;
}
/*
* throw exception if no match or tell user which type
*/
if (type == UNSET) {
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + 1 + " closing file " + e1);
}
throw new PdfException("Exception unable to find ref or obj in trailer");
}
return type == COMPRESSED;
}
public PdfObject getInfoObject() {
return infoObject;
}
public PdfObject getEncryptionObject() {
return encryptObj;
}
public byte[] getID() {
return ID;
}
//////////////////////////////////////////////////////////////////////////
/**
* returns current location pointer and sets to new value
*/
public void movePointer(final long pointer) {
try {
//make sure inside file
if (pointer > pdf_datafile.length()) {
LogWriter.writeLog("Attempting to access ref outside file");
} else {
pdf_datafile.seek(pointer);
}
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " moving pointer to " + pointer + " in file.");
}
}
}