org.jpedal.io.types.Dictionary Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of OpenViewerFX Show documentation
Show all versions of OpenViewerFX Show documentation
An Open Source JavaFX PDF Viewer
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* ---------------
* Dictionary.java
* ---------------
*/
package org.jpedal.io.types;
import java.util.ArrayList;
import org.jpedal.io.ObjectDecoder;
import static org.jpedal.io.ObjectDecoder.debugFastCode;
import static org.jpedal.io.ObjectDecoder.padding;
import static org.jpedal.io.ObjectDecoder.resolveFully;
import org.jpedal.io.ObjectUtils;
import org.jpedal.io.PdfFileReader;
import org.jpedal.objects.raw.FormObject;
import org.jpedal.objects.raw.ObjectFactory;
import org.jpedal.objects.raw.PdfDictionary;
import org.jpedal.objects.raw.PdfObject;
import org.jpedal.utils.NumberUtils;
/**
*
*/
public class Dictionary {
public static int readDictionary(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {
//if we only need top level do not read whole tree
final boolean ignoreRecursion=pdfObject.ignoreRecursion();
//roll on
if(raw[i]!='<') {
i++;
}
i=StreamReaderUtils.skipSpaces(raw, i);
//some objects can have a common value (ie /ToUnicode /Identity-H
if(raw[i]==47){
i = readKey(pdfObject, i+1, raw, PDFkeyInt);
}else if(StreamReaderUtils.isEndObj(raw,i)){ //allow for empty object
if(debugFastCode) {
System.out.println(padding + "Empty object" + new String(raw) + "<<");
}
}else if(!ignoreRecursion){
i = Dictionary.readDictionaryFromRefOrDirect(pdfObject,pdfObject.getObjectRefAsString(), i, raw, PDFkeyInt,objectReader);
}else{ //we need to ref from ref elsewhere which may be indirect [ref], hence loop
i = readRef(pdfObject, i, raw, PDFkeyInt, objectReader);
}
return i;
}
static int readRef(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {
if(debugFastCode) {
System.out.println(padding + "1.About to read ref orDirect i=" + i + " char=" + (char) raw[i]);
}
i = StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
i = Dictionary.readDictionaryFromRefOrDirect(pdfObject,pdfObject.getObjectRefAsString(), i, raw, PDFkeyInt,objectReader);
return i;
}
static int readKey(final PdfObject pdfObject, int i, final byte[] raw, final int PDFkeyInt) {
final int keyStart=i;
i= StreamReaderUtils.skipToEndOfRef(raw, i);
final PdfObject valueObj= ObjectFactory.createObject(PDFkeyInt,pdfObject.getObjectRefAsString(), pdfObject.getObjectType(), pdfObject.getID());
valueObj.setID(PDFkeyInt);
//store value
valueObj.setConstant(PDFkeyInt,keyStart,i-keyStart,raw);
valueObj.setGeneralStringValue(new String(getByteKeyFromStream(i-keyStart, raw, keyStart)));
if(debugFastCode) {
System.out.println(padding + "Set Dictionary as String=" + valueObj.getGeneralStringValue() + " in " + pdfObject + " to " + valueObj);
}
//store value
pdfObject.setDictionary(PDFkeyInt,valueObj);
return i;
}
public static int setDictionaryValue(final PdfObject pdfObject, int i, final byte[] raw, final PdfFileReader objectReader, final int PDFkeyInt) {
//if we only need top level do not read whole tree
final boolean ignoreRecursion=pdfObject.ignoreRecursion();
if(debugFastCode) {
System.out.println(padding + ">>>Reading Dictionary Pairs i=" + i + ' ' + (char) raw[i] + (char) raw[i + 1] + (char) raw[i + 2] + (char) raw[i + 3] + (char) raw[i + 4] + (char) raw[i + 5] + (char) raw[i + 6]);
}
i = StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 47);
//set data which will be switched below if ref
byte[] data=raw;
int j=i;
//get next key to see if indirect
final boolean isRef=data[j]!='<';
if(isRef){
//number
final int[] values = StreamReaderUtils.readRefFromStream(raw, i);
final int number = values[0];
final int generation = values[1];
i = values[2];
if(!ignoreRecursion){
//read the Dictionary data
data=objectReader.readObjectAsByteArray(pdfObject, objectReader.isCompressed(number, generation), number, generation);
//allow for data in Linear object not yet loaded
if(data==null){
pdfObject.setFullyResolved(false);
if(debugFastCode) {
System.out.println(padding + "Data not yet loaded");
}
return raw.length;
}
if(data[0]=='<' && data[1]=='<'){
j=0;
}else{
//lose obj at start
j=3;
while(data[j-1]!=106 && data[j-2]!=98 && data[j-3]!=111){
if(data[j]=='/'){ //trap for odd case
j=0;
break;
}
j++;
if(j==data.length){ //some missing obj so catch these
j=0;
break;
}
}
j=StreamReaderUtils.skipSpaces(data, j);
if(data[j]=='%'){
j=StreamReaderUtils.skipComment(data, j);
}
}
}
}
//allow for empty object (ie /Pattern <<>> )
final int endJ=StreamReaderUtils.skipSpacesOrOtherCharacter(data, j,'<');
if(data[endJ]=='>'){ //empty object
j=endJ+1;
}else{
final PdfObject valueObj= ObjectFactory.createObject(PDFkeyInt, pdfObject.getObjectRefAsString(), pdfObject.getObjectType(), pdfObject.getID());
valueObj.setID(PDFkeyInt);
//read pairs (stream in data starting at j)
j=readKeyPairs(data, j,valueObj);
//store value
pdfObject.setDictionary(PDFkeyInt,valueObj);
if(debugFastCode) {
System.out.println(padding + "Set Dictionary pairs type in " + pdfObject + " to " + valueObj);
}
}
//update pointer if direct so at end (if ref already in right place)
if(!isRef){
i=j;
if(debugFastCode) {
System.out.println(i + ">>>>" + data[i - 2] + ' ' + data[i - 1] + " >" + data[i] + "< " + data[i + 1] + ' ' + data[i + 2]);
}
}
return i;
}
/**
* sets pairs and returns point reached in stream
*/
private static int readKeyPairs(final byte[] data, int start, final PdfObject pdfObject) {
final ArrayList keys=new ArrayList(100);
final ArrayList values=new ArrayList(100);
while(true){
//move cursor to start of text
start = StreamReaderUtils.skipSpacesOrOtherCharacter(data, start, 60);
if(data[start]==37){ //allow for comment
start = StreamReaderUtils.skipComment(data, start);
}
if(data[start]==62 || StreamReaderUtils.isEndObj(data,start)) { //exit at end
break;
}
//read key (starts with /)
final int tokenStart=start+1;
start=StreamReaderUtils.skipToEndOfKey(data, tokenStart);
keys.add(getByteKeyFromStream(start-tokenStart, data, tokenStart));
//read value
start=StreamReaderUtils.skipSpaces(data,start);
int refStart=start;
if(StreamReaderUtils.isNull(data,start)){
start += 4;
values.add(null);
}else {
if (data[start]==60 || data[start]=='[' || data[start]=='/') {
refStart = start;
if (data[start] == '<') {
start = ObjectUtils.skipToEndOfObject(start, data);
} else if (data[start] == '[') {
start=StreamReaderUtils.skipToEndOfArray(data, start);
} else if (data[start] == '/') {
start=StreamReaderUtils.skipToEndOfKey(data, start+1);
}
} else { //its 50 0 R
while (data[start] != 'R') {
start++;
}
start++; //roll past R
}
values.add(getByteKeyFromStream(start - refStart, data, refStart));
}
}
final int size=keys.size();
final byte[][] returnKeys=new byte[size][];
final byte[][] returnValues=new byte[size][];
for(int a=0;a0){
if(raw[i]=='<' && raw[i+1]=='<'){
i += 2;
reflevel++;
}else if(raw[i]=='(' ){ //allow for << (>>) >>
i++;
while(raw[i]!=')' || ObjectUtils.isEscaped(raw, i)) {
i++;
}
}else if(raw[i]=='>' && i+1==raw.length){
reflevel=0;
}else if(raw[i]=='>' && raw[i+1]=='>'){
i += 2;
reflevel--;
}else {
i++;
}
}
}else if(raw[i]=='['){
i++;
int reflevel=1;
while(reflevel>0){
if(raw[i]=='(' ){ //allow for [[ in stream ie [/Indexed /DeviceRGB 255 (abc[[z
i++;
while(raw[i]!=')' || ObjectUtils.isEscaped(raw, i)) {
i++;
}
}else if(raw[i]=='[' ){
reflevel++;
}else if(raw[i]==']'){
reflevel--;
}
i++;
}
i--;
}else if(StreamReaderUtils.isNull(raw,i)){ //allow for null
i += 4;
}else{ //must be a ref
//assume not object and reset below if wrong
status=PdfObject.UNDECODED_REF;
while(raw[i]!='R' || raw[i-1]=='e') { //second condition to stop spurious match on DeviceRGB
i++;
if(i==raw.length) {
break;
}
}
i++;
if(i>=raw.length) {
i = raw.length - 1;
}
}
}
valueObj.setStatus(status);
if(status!=PdfObject.DECODED){
final int StrLength=i-start;
final byte[] unresolvedData=new byte[StrLength];
System.arraycopy(raw, start, unresolvedData, 0, StrLength);
//check for returns in data if ends with R and correct to space
if(unresolvedData[StrLength-1]==82){
for(int jj=0;jj') //move back so loop works
{
i--;
}
return i;
}
/**
* @param pdfObject
* @param objectRef
* @param i
* @param raw
* @param PDFkeyInt - -1 will store in pdfObject directly, not as separate object
* @return
*/
public static int readDictionaryFromRefOrDirect(final PdfObject pdfObject, final String objectRef, int i, final byte[] raw, final int PDFkeyInt, final PdfFileReader objectReader) {
readDictionaryFromRefOrDirect:
while (true) {
i=StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
if(raw[i]=='%'){
i=StreamReaderUtils.skipComment(raw, i);
i=StreamReaderUtils.skipSpacesOrOtherCharacter(raw, i, 91);
}
if (raw[i] == 60) { //[<>]
i = handlePairs(pdfObject, objectRef, i, raw, PDFkeyInt);
if(i<0) {
i=-i;
}else{
i = DirectDictionaryToObject.convert(pdfObject, objectRef, i, raw, PDFkeyInt,objectReader);
}
} else if (raw[i] == 47) { //direct value such as /DeviceGray
i = ObjectUtils.setDirectValue(pdfObject, i, raw, PDFkeyInt);
} else { // ref or [ref]
int j = i, ref, generation;
byte[] data = raw;
while (true) {
//allow for [ref] at top level (may be followed by gap
j=StreamReaderUtils.skipSpacesOrOtherCharacter(data, j, 91);
//trap empty arrays ie [ ]
//ie 13jun/Factuur 2106010.PDF
if (data[j] == ']') {
return j;
}
// trap nulls as well
boolean hasNull = false;
int keyStart;
int[] values;
while (true) {
//trap null arrays ie [null null]
if (hasNull && data[j] == ']') {
return j;
}
values = StreamReaderUtils.readRefFromStream(data, j);
ref = values[0];
keyStart = j;
j=StreamReaderUtils.skipToEndOfRef(data, j);
j=StreamReaderUtils.skipSpaces(data, j);
//handle nulls
if (ref != 69560 || data[keyStart] != 'n') {
break; //not null
} else {
hasNull = true;
if (data[j] == '<') { // /DecodeParms [ null << /K -1 /Columns 1778 >> ] ignore null and jump down to enclosed Dictionary
i = j;
continue readDictionaryFromRefOrDirect;
}
}
}
generation = values[1];
j = values[2];
data = objectReader.readObjectAsByteArray(pdfObject, objectReader.isCompressed(ref, generation), ref, generation);
//allow for data in Linear object not yet loaded
if (data == null) {
pdfObject.setFullyResolved(false);
return raw.length;
}
//disregard corrputed data from start of file
if (data != null && data.length > 4 && data[0] == '%' && data[1] == 'P' && data[2] == 'D' && data[3] == 'F') {
data = null;
}else if(StreamReaderUtils.isNull(data,0)){
data=null;
}
if (data == null) {
break;
}
/*
* get not indirect and exit if not
*/
int j2 = 0;
//allow for [91 0 r]
if (data[j2] != '[' && data[0] != '<' && data[1] != '<') {
while (j2 < 3 || (j2 > 2 && data[j2 - 1] != 106 && data[j2 - 2] != 98 && data[j2 - 3] != 111)) {
//allow for /None as value
if (data[j2] == '/') {
break;
}
j2++;
}
j2=StreamReaderUtils.skipSpaces(data,j2);
}
//if indirect, round we go again
if (data[j2] != 91) {
j = 0;
break;
}else if(data[j2]=='[' && data[j2+1]=='<'){
j2++;
j=j2;
break;
}
j = j2;
}
//allow for no data found (ie /PDFdata/baseline_screens/debug/hp_broken_file.pdf)
if (data != null) {
i = handlePairs(pdfObject, objectRef, i, raw, PDFkeyInt);
if(i<0) {
i=-i;
}else{
i=readObj(j, data, raw, ref, generation, i, pdfObject, PDFkeyInt, objectReader);
}
}
}
return i;
}
}
static int handlePairs(final PdfObject pdfObject, final String objectRef, int i, final byte[] raw, final int PDFkeyInt) {
boolean isPairs=false;
//@zain @bethan - you will need to enable here
//do this third
//we need to avoid this for AA as D can occur in there as a Dictionary
final int parentType=pdfObject.getPDFkeyInt();
if((parentType!= PdfDictionary.AA) &&
(PDFkeyInt==PdfDictionary.N || PDFkeyInt==PdfDictionary.R || PDFkeyInt==PdfDictionary.D || PDFkeyInt==PdfDictionary.Dests)){
isPairs=isDictionaryPairs(i, raw);
}
if(isPairs){
final FormObject APobj=new FormObject(objectRef);
pdfObject.setDictionary(PDFkeyInt, APobj);
i=-readKeyPairs(raw, i, APobj);
}
return i;
}
private static int readObj(int j, final byte[] data, final byte[] raw, int ref, int generation, int i, final PdfObject pdfObject, final int PDFkeyInt, final PdfFileReader objectReader) {
/*
* get id from stream
*/
j=StreamReaderUtils.skipSpaces(data, j);
//check not <0){
if(raw[end]=='<'&& raw[end+1]=='<'){
level2++;
end += 2;
}else if(raw[end-1]=='>'&& raw[end]=='>'){
level2--;
if(level2>0) {
end += 2;
}
}else if(raw[end]=='('){ //scan (strings) as can contain >>
end++;
while(raw[end]!=')' || ObjectUtils.isEscaped(raw, end)) {
end++;
}
}else {
end++;
}
}
inDictionary=false;
}else if(raw[end]=='R' ){
inDictionary=false;
}else if(isKey && (raw[end]==' ' || raw[end]==13 || raw[end]==10 || raw[end]==9)){
inDictionary=false;
}else if(raw[end]=='/'){
inDictionary=false;
end--;
}else if(raw[end]=='>' && raw[end+1]=='>'){
inDictionary=false;
end--;
}else {
end++;
}
}
//boolean save=debugFastCode;
Dictionary.readDictionary(pdfObject,i, raw, PDFkeyInt, objectReader);
//use correct value
return end;
}
static int handleValue(final PdfObject pdfObject, int i, final int PDFkeyInt, int j, final int ref, final int generation, final byte[] data, final PdfFileReader objectReader) {
final int keyStart;
int keyLength;
final int dataLen=data.length;
if (data[j] == 47) {
j++; //roll on past /
keyStart = j;
keyLength = 0;
//move cursor to end of text
while (j' && raw[j+1]=='>'){
level--;
if(level<0){
break;
}
}else if(level==0 && (raw[j]=='[' || (raw[j]=='/' && raw[j+1]=='T' && raw[j+2]=='y' && raw[j+3]=='p')
|| (raw[j]=='/' && raw[j+1]=='R' && raw[j+2]=='e' && raw[j+3]=='s' && raw[j+4]=='o' && raw[j+5]=='u')
|| (raw[j]=='s' && raw[j+1]=='t' && raw[j+2]=='r' && raw[j+3]=='e' && raw[j+4]=='a' && raw[j+5]=='m') || (raw[j]=='(' && raw[j+1]== ')'))){
j=length;
isPair=false;
}
}
return isPair;
}
}