Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2016 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* RefTable.java
* ---------------
*/
package org.jpedal.io.types;
import java.io.IOException;
import org.jpedal.exception.PdfException;
import org.jpedal.io.ObjectDecoder;
import org.jpedal.io.PdfFileReader;
import org.jpedal.io.RandomAccessBuffer;
import org.jpedal.objects.raw.CompressedObject;
import org.jpedal.objects.raw.PageObject;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.LogWriter;
import org.jpedal.utils.NumberUtils;
/**
*
*/
public class RefTable {
PdfObject encryptObj;
/**holds file ID*/
private byte[] ID;
/**pattern to look for in objects*/
static final String pattern= "obj";
/**info object*/
private PdfObject infoObject;
static final int UNSET=-1;
static final int COMPRESSED=1;
static final int LEGACY=2;
private RandomAccessBuffer pdf_datafile;
static final byte[] oldPattern = {'x','r','e','f'};
private final long eof;
final Offsets offset;
public RefTable(final RandomAccessBuffer pdf_datafile, final long eof, final Offsets offset) {
this.pdf_datafile=pdf_datafile;
this.eof=eof;
this.offset=offset;
}
/**
* read first start ref from last 1024 bytes
*/
private int readFirstStartRef() throws PdfException {
//reset flag
offset.setRefTableInvalid(false);
int pointer = -1;
int i = 1019;
final StringBuilder startRef = new StringBuilder(10);
/* move to end of file and read last 1024 bytes*/
final int block=1024;
byte[] lastBytes = new byte[block];
long end;
/*
* set endpoint, losing null chars and anything before EOF
*/
final int[] EndOfFileMarker={37,37,69,79};
int valReached=3;
boolean EOFFound=false;
try {
end=eof;
/*
* lose nulls and other trash from end of file
*/
final int bufSize=255;
while(true){
final byte[] buffer=getBytes(end - bufSize, bufSize);
int offset=0;
for(int ii=bufSize-1;ii>-1;ii--){
//see if we can decrement EOF tracker or restart check
if(!EOFFound) {
valReached = 3;
}
if(buffer[ii]==EndOfFileMarker[valReached]){
valReached--;
EOFFound=true;
}else {
EOFFound = false;
}
//move to next byte
offset--;
if(valReached<0) {
ii = -1;
}
}
//exit if found values on loop
if(valReached<0){
end -= offset;
break;
}else{
end -= bufSize;
}
//allow for no eof
if(end<0){
end=eof;
break;
}
}
//end=end+bufSize;
//allow for very small file
int count=(int)(end - block);
if(count<0){
count=0;
final int size=(int)eof;
lastBytes=new byte[size];
i=size+3; //force reset below
}
lastBytes=getBytes(count, lastBytes.length);
} catch (final Exception e) {
LogWriter.writeLog("Exception " + e + " reading last 1024 bytes");
throw new PdfException( e + " reading last 1024 bytes");
}
// for(int ii=0;iifileSize) {
i = fileSize - 5;
}
while (i >-1) {
//first check is because startref works as well a startxref !!
if (((lastBytes[i] == 116 && lastBytes[i + 1] == 120) || (lastBytes[i] == 114 && lastBytes[i + 1] == 116))
&& (lastBytes[i + 2] == 114)
&& (lastBytes[i + 3] == 101)
&& (lastBytes[i + 4] == 102)) {
break;
}
i--;
}
/*trap buggy files*/
if(i==-1){
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e1 + " closing file");
}
throw new PdfException( "No Startxref found in last 1024 bytes ");
}
i += 5; //allow for word length
//move to start of value ignoring spaces or returns
while (i < 1024 && (lastBytes[i] == 10 || lastBytes[i] == 32 || lastBytes[i] == 13)) {
i++;
}
//move to start of value ignoring spaces or returns
while ((i < 1024)
&& (lastBytes[i] != 10)
&& (lastBytes[i] != 32)
&& (lastBytes[i] != 13)) {
startRef.append((char) lastBytes[i]);
i++;
}
/*convert xref to string to get pointer*/
if (startRef.length() > 0) {
pointer = Integer.parseInt(startRef.toString());
}
if (pointer == -1){
LogWriter.writeLog("No Startref found in last 1024 bytes ");
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e1 + " closing file");
}
throw new PdfException( "No Startref found in last 1024 bytes ");
}
return pointer;
}
/**
* read reference table start to see if new 1.5 type or traditional xref
* @throws PdfException
*/
public final PdfObject readReferenceTable(final PdfObject linearObj, final PdfFileReader currentPdfFile, final ObjectReader objectReader) throws PdfException {
int pointer = -1;
final int eof = (int) this.eof;
boolean islinearizedCompressed = false;
if (linearObj == null) {
pointer = readFirstStartRef();
} else { //find at start of Linearized
final byte[] data = pdf_datafile.getPdfBuffer();
final int count = data.length;
int ptr = 5;
for (int i = 0; i < count; i++) {
//track start of this object (needed for compressed)
if (data[i] == 'e' && data[i + 1] == 'n' && data[i + 2] == 'd' && data[i + 3] == 'o' && data[i + 4] == 'b' && data[i + 5] == 'j') {
ptr = i + 6;
}
if (data[i] == 'x' && data[i + 1] == 'r' && data[i + 2] == 'e' && data[i + 3] == 'f') {
pointer = i;
i = count;
}else if (data[i] == 'X' && data[i + 1] == 'R' && data[i + 2] == 'e' && data[i + 3] == 'f') {
islinearizedCompressed = true;
pointer = ptr;
while (data[pointer] == 10 || data[pointer] == 13 || data[pointer] == 32) {
pointer++;
}
i = count;
}
}
}
offset.addXref(pointer);
PdfObject rootObj=null;
if (pointer >= eof || pointer==0) {
LogWriter.writeLog("Pointer not if file - trying to manually find startref");
offset.setRefTableInvalid(true);
try{
rootObj=new PageObject(BrokenRefTable.findOffsets(pdf_datafile, offset));
}catch(Error err){
throw new PdfException(err.getMessage()+" attempting to manually scan file for objects");
}
currentPdfFile.readObject(rootObj);
return rootObj;
} else if (islinearizedCompressed || isCompressedStream(pointer, eof)) {
return readCompressedStream(rootObj,pointer, currentPdfFile, objectReader,linearObj);
} else {
return readLegacyReferenceTable(rootObj,pointer, eof,currentPdfFile);
}
}
/**
* read reference table from file so we can locate
* objects in pdf file and read the trailers
*/
private PdfObject readLegacyReferenceTable(PdfObject rootObj,int pointer, final int eof, final PdfFileReader currentPdfFile) throws PdfException {
int endTable, current = 0; //current object number
byte[] Bytes ;
int bufSize = 1024;
/*read and decode 1 or more trailers*/
while (true) {
try {
//allow for pointer outside file
Bytes=Trailer.readTrailer(bufSize, pointer, eof,pdf_datafile);
} catch (final Exception e) {
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + e + " closing file "+e1);
}
throw new PdfException("Exception " + e + " reading trailer");
}
if (Bytes == null) //safety catch
{
break;
}
//get trailer
int i = 0;
final int maxLen=Bytes.length;
boolean trailerNotFound=true;
//for(int a=0;a<100;a++)
// System.out.println((char)Bytes[i+a]);
while (i >
int level=0;
while(true){
if(Bytes[i] == 60 && Bytes[i - 1] == 60){
level++;
i++;
}else if(Bytes[i] =='['){
i++;
while(Bytes[i]!=']'){
i++;
if(i==Bytes.length) {
break;
}
}
}else if(Bytes[i] ==62 && Bytes[i - 1] ==62){
level--;
i++;
}
if(level==0) {
break;
}
i++;
}
//handle optional XRefStm
final int XRefStm=pdfObject.getInt(PdfDictionary.XRefStm);
if(XRefStm!=-1){
pointer=XRefStm;
}else{ //usual way
boolean hasRef=true;
i = StreamReaderUtils.skipSpaces(Bytes, i);
while (Bytes[i] =='%'){
while(Bytes[i]!=10){
i++;
}
i++;
}
/* fix for /Users/markee/Downloads/oneiderapartnerbrochure_web_1371798737.pdf
/**/
//look for xref as end of startref
while (Bytes[i] != 116 && Bytes[i + 1] != 120 &&
Bytes[i + 2] != 114 && Bytes[i + 3] != 101 && Bytes[i + 4] != 102){
if(Bytes[i]=='o' && Bytes[i+1]=='b' && Bytes[i+2]=='j'){
hasRef=false;
break;
}
i++;
}
if(hasRef){
i += 8;
//move to start of value ignoring spaces or returns
while ((i < maxLen)&& (Bytes[i] == 10 || Bytes[i] == 32 || Bytes[i] == 13)) {
i++;
}
final int s=i;
//allow for characters between xref and startref
while (i < maxLen && Bytes[i] != 10 && Bytes[i] != 32 && Bytes[i] != 13) {
i++;
}
/*convert xref to string to get pointer*/
if (s!=i) {
pointer = NumberUtils.parseInt(s, i, Bytes);
}
}
}
i = StreamReaderUtils.skipSpaces(Bytes, 0);
if (pointer == -1){
LogWriter.writeLog("No startRef");
/*now read the objects for the trailers*/
} else if (Bytes[i] == 120 && Bytes[i+1] == 114 && Bytes[i+2] == 101 && Bytes[i+3] == 102) { //make sure starts xref
i = StreamReaderUtils.skipSpaces(Bytes, 5);
current = offset.readXRefs(current, Bytes, endTable, i,eof,pdf_datafile);
/*now process trailer values - only first set of table values for root, encryption and info*/
if (rootObj==null) {
rootObj=pdfObject.getDictionary(PdfDictionary.Root);
encryptObj=pdfObject.getDictionary(PdfDictionary.Encrypt);
if(encryptObj!=null){
final byte[][] IDs=pdfObject.getStringArray(PdfDictionary.ID);
if(IDs!=null && this.ID==null) {
// only the first encountered ID should be used as a fileID for decryption
this.ID = IDs[0];
}
}
infoObject=pdfObject.getDictionary(PdfDictionary.Info);
}
//make sure first values used if several tables and code for prev
pointer=pdfObject.getInt(PdfDictionary.Prev);
//see if other trailers
if (pointer!=-1 && pointer=0){
try {
pdf_datafile.seek(start);
pdf_datafile.read(buffer); //get next chars
} catch (final IOException e) {
LogWriter.writeLog("Exception: " + e.getMessage());
}
}
return buffer;
}
void closeFile() throws IOException {
if(pdf_datafile!=null){
pdf_datafile.close();
pdf_datafile=null;
}
}
/**
* test first bytes to see if new 1.5 style table with obj or contains ref
* @throws PdfException
*/
private boolean isCompressedStream(int pointer, final int eof) throws PdfException {
final boolean debug=false;
int bufSize = 50,charReached_legacy=0, charReached_comp1=0,charReached_comp2=0;
final int[] objStm={'O','b','j','S','t','m'};
final int[] XRef={'X','R','e','f'};
int type=UNSET;
//flag to show if at start of data for check
boolean firstRead=true;
while (true) {
/* adjust buffer if less than 1024 bytes left in file */
if (pointer + bufSize > eof) {
bufSize = eof - pointer;
}
if(bufSize<0) {
bufSize = 50;
}
if (pointer < 0) {
pointer += bufSize;
continue;
}
final byte[] buffer = getBytes(pointer, bufSize);
//allow for fact sometimes start of data wrong
if(firstRead && buffer[0]=='r' && buffer[1]=='e' && buffer[2]=='f') {
charReached_legacy = 1;
}
firstRead=false; //switch off
/*look for xref or obj */
for (int i = 0; i < bufSize; i++) {
final byte currentByte = buffer[i];
if(debug) {
System.out.print((char) currentByte);
}
/* check for xref OR end - reset if not */
if (currentByte == oldPattern[charReached_legacy] && type!=COMPRESSED){
charReached_legacy++;
type=LEGACY;
}else if ((currentByte == objStm[charReached_comp1] )&& (charReached_comp1==0 || type==COMPRESSED)){
charReached_comp1++;
type=COMPRESSED;
}else if ((currentByte == XRef[charReached_comp2] )&& (charReached_comp2==0 || type==COMPRESSED)){
charReached_comp2++;
type=COMPRESSED;
}else{
charReached_legacy=0;
charReached_comp1=0;
charReached_comp2=0;
type=UNSET;
}
if (charReached_legacy==3 || charReached_comp1==4 || charReached_comp2 == 3) {
break;
}
}
if (charReached_legacy==3 || charReached_comp1==4 || charReached_comp2 == 3) {
break;
}
//update pointer
pointer += bufSize;
}
/*
* throw exception if no match or tell user which type
*/
if(type==UNSET){
try {
closeFile();
} catch (final IOException e1) {
LogWriter.writeLog("Exception " + 1 + " closing file "+e1);
}
throw new PdfException("Exception unable to find ref or obj in trailer");
}
return type == COMPRESSED;
}
public PdfObject getInfoObject() {
return infoObject;
}
public PdfObject getEncryptionObject() {
return encryptObj;
}
public byte[] getID(){
return ID;
}
//////////////////////////////////////////////////////////////////////////
/**
* returns current location pointer and sets to new value
*/
public void movePointer(final long pointer)
{
try {
//make sure inside file
if( pointer > pdf_datafile.length() ){
LogWriter.writeLog("Attempting to access ref outside file");
}else{
pdf_datafile.seek( pointer );
}
}catch( final Exception e ){
LogWriter.writeLog("Exception " + e + " moving pointer to " + pointer + " in file.");
}
}
}