org.jpedal.parser.CommandParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of OpenViewerFX Show documentation
Show all versions of OpenViewerFX Show documentation
Open Source (LGPL) JavaFX PDF Viewer for NetBeans plugin
The newest version!
/*
* ===========================================
* Java Pdf Extraction Decoding Access Library
* ===========================================
*
* Project Info: http://www.idrsolutions.com
* Help section for developers at http://www.idrsolutions.com/support/
*
* (C) Copyright 1997-2017 IDRsolutions and Contributors.
*
* This file is part of JPedal/JPDF2HTML5
*
@LICENSE@
*
* ---------------
* CommandParser.java
* ---------------
*/
package org.jpedal.parser;
import java.util.ArrayList;
import org.jpedal.io.types.StreamReaderUtils;
import org.jpedal.utils.NumberUtils;
public class CommandParser {
private final byte[] characterStream;
private int commandID = -1;
private static final int[] prefixes = {60, 40}; //important that [ comes before ( '<'=60 '('=40
private static final int[] suffixes = {62, 41}; //'>'=62 ')'=41
private static final int[][] intValues = {
{0, 100000, 200000, 300000, 400000, 500000, 600000, 700000, 800000, 900000},
{0, 10000, 20000, 30000, 40000, 50000, 60000, 70000, 80000, 90000},
{0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000},
{0, 100, 200, 300, 400, 500, 600, 700, 800, 900},
{0, 10, 20, 30, 40, 50, 60, 70, 80, 90},
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}};
/**
* maximum ops
*/
private static final int MAXOPS = 50;
/**
* lookup table for operands on commands
*/
private int[] opStart = new int[MAXOPS];
private int[] opEnd = new int[MAXOPS];
private int operandCount;
/**
* current op
*/
private int currentOp;
int streamSize;
public CommandParser(final byte[] characterStr) {
this.characterStream = characterStr;
streamSize = characterStr.length;
}
int getCommandValues(int dataPointer, final int tokenNumber) {
final int count = prefixes.length;
int start, end = 0;
commandID = -1;
final int sLen = characterStream.length;
dataPointer = StreamReaderUtils.skipSpacesOrOtherCharacter(characterStream, dataPointer, 0);
if (dataPointer == streamSize) //allow for end of stream
{
return dataPointer;
}
//lose any comments in stream which start %
while (dataPointer < streamSize && characterStream[dataPointer] == 37) {
dataPointer = StreamReaderUtils.skipComment(characterStream, dataPointer);
}
if (dataPointer >= streamSize) { //allow for end of stream
return dataPointer;
}
int current = characterStream[dataPointer];
// read in value (note several options)
boolean matchFound = false;
final int type = getType(current, dataPointer);
if (type == 3) { //option - its an aphabetical so may be command or operand values
start = dataPointer;
while (true) { //read next valid char
dataPointer++;
if ((dataPointer) >= sLen) //trap for end of stream
{
break;
}
current = characterStream[dataPointer];
//return,space,( / or [
if (current == 13 || current == 10 || current == 32 || current == 40 || current == 47 || current == 91 || current == 9 || current == '<') {
break;
}
}
end = dataPointer - 1;
if (end >= sLen) {
return end;
}
//move back if ends with / or [
final int endC = characterStream[end];
if (endC == 47 || endC == 91 || endC == '<' || endC == '%') {
end--;
}
//see if command
commandID = -1;
if (end - start < 3) { //no command over 3 chars long
//@turn key into ID.
//convert token to int
int key = 0, x = 0;
for (int i2 = end; i2 > start - 1; i2--) {
key += (characterStream[i2] << x);
x += 8;
}
commandID = Cmd.getCommandID(key);
}
// if command execute otherwise add to stack
if (commandID == -1) {
opStart[currentOp] = start;
opEnd[currentOp] = end;
if (PdfStreamDecoder.showCommands) {
System.out.println(PdfStreamDecoder.indent + generateOpAsString(currentOp, false) + " (value) " + tokenNumber);
}
currentOp++;
if (currentOp == MAXOPS) {
currentOp = 0;
}
operandCount++;
} else {
//showCommands=(tokenNumber>6300);
//this makes rest of page disappear
// if(tokenNumber>22)
//return streamSize;
if (PdfStreamDecoder.showCommands) {
System.out.println(PdfStreamDecoder.indent + Cmd.getCommandAsString(commandID) + " (Command) " + tokenNumber);
}
//reorder values so work
if (operandCount > 0) {
reverseOperands();
}
//use negative to flag values found
return -dataPointer;
}
} else if (type != 4) {
start = dataPointer;
//option << values >>
//option [value] and [value (may have spaces and brackets)]
if (type == 1 || type == 2) {
boolean inStream = false;
matchFound = true;
int last = 32; // ' '=32
while (true) { //read rest of chars
if (last == 92 && current == 92) //allow for \\ \\=92
{
last = 120; //'x'=120
} else {
last = current;
}
dataPointer++; //roll on counter
if (dataPointer == sLen) //allow for end of stream
{
break;
}
//read next valid char, converting CR to space
current = characterStream[dataPointer];
if (current == 13 || current == 10 || current == 9) {
current = 32;
}
//exit at end
boolean isBreak = false;
if (current == 62 && last == 62 && (type == 1)) //'>'=62
{
if (characterStream[dataPointer + 1] == '>') { //all fpr >> and >
dataPointer++; //roll on in case no gap (ie case 25436)
}
isBreak = true;
}
if (type == 2) {
//stream flags
if ((current == 40) && (last != 92)) //'('=40 '\\'=92
{
inStream = true;
} else if ((current == 41) && (last != 92)) {
inStream = false;
}
//exit at end
if (!inStream && current == 93 && last != 92) //']'=93
{
isBreak = true;
}
}
if (isBreak) {
break;
}
}
end = dataPointer;
}
if (!matchFound) { //option 3 other braces
int last = 32;
for (int startChars = 0; startChars < count; startChars++) {
if (current == prefixes[startChars]) {
matchFound = true;
start = dataPointer;
int numOfPrefixs = 0; //counts the brackets when inside a text stream
while (true) { //read rest of chars
if ((last == 92) && (current == 92)) //allow for \\ '\\'=92
{
last = 120; //'x'=120
} else {
last = current;
}
dataPointer++; //roll on counter
if (dataPointer == sLen) {
break;
}
current = characterStream[dataPointer]; //read next valid char, converting CR to space
if (current == 13 || current == 10 || current == 9) {
current = 32;
}
if (current == prefixes[startChars] && last != 92) // '\\'=92
{
numOfPrefixs++;
}
if ((current == suffixes[startChars]) && (last != 92)) { //exit at end '\\'=92
if (numOfPrefixs == 0) {
break;
} else {
numOfPrefixs--;
}
}
}
startChars = count; //exit loop after match
}
}
end = dataPointer;
}
//option 2 -its a value followed by a deliminator (CR,space,/)
if (!matchFound) {
start = dataPointer;
final int firstChar = characterStream[start];
while (true) { //read next valid char
dataPointer++;
if (dataPointer == sLen) //trap for end of stream
{
break;
}
current = characterStream[dataPointer];
if (current == 13 || current == 10 || current == 32 || current == 40 || current == 47 || current == 91 || current == 9 || (firstChar == '/' && current == '<'))
// // '('=40 '/'=47 '['=91
{
break;
}
}
end = dataPointer;
}
if (end < characterStream.length) {
final int next = characterStream[end];
if (next == 47 || next == 91) {
end--;
}
}
opStart[currentOp] = start;
opEnd[currentOp] = end;
if (PdfStreamDecoder.showCommands) {
System.out.println(PdfStreamDecoder.indent + generateOpAsString(currentOp, false) + "<<----");
}
currentOp++;
if (currentOp == MAXOPS) {
currentOp = 0;
}
operandCount++;
}
//increment pointer
if (dataPointer < streamSize) {
final int nextChar = characterStream[dataPointer];
if (nextChar != 47 && nextChar != 40 && nextChar != 91 && nextChar != '<') {
dataPointer++;
}
}
return dataPointer;
}
private void reverseOperands() {
final int[] orderedOpStart = new int[MAXOPS];
final int[] orderedOpEnd = new int[MAXOPS];
int opid = 0;
for (int jj = this.currentOp - 1; jj > -1; jj--) {
orderedOpStart[opid] = opStart[jj];
orderedOpEnd[opid] = opEnd[jj];
if (opid == operandCount) {
jj = -1;
}
opid++;
}
if (opid == operandCount) {
currentOp--; //decrease to make loop comparison faster
for (int jj = MAXOPS - 1; jj > currentOp; jj--) {
orderedOpStart[opid] = opStart[jj];
orderedOpEnd[opid] = opEnd[jj];
if (opid == operandCount) {
jj = currentOp;
}
opid++;
}
currentOp++;
}
opStart = orderedOpStart;
opEnd = orderedOpEnd;
}
public int getCommandID() {
return commandID;
}
private int getType(final int current, final int dataPointer) {
int type = 0;
if (current == 60 && characterStream[dataPointer + 1] == 60) //look for <<
{
type = 1;
} else if (current == 32) {
type = 4;
} else if (current == 91) //[
{
type = 2;
} else if (current >= 97 && current <= 122) //lower case alphabetical a-z
{
type = 3;
} else if (current >= 65 && current <= 90) //upper case alphabetical A-Z
{
type = 3;
} else if (current == 39 || current == 34) //not forgetting the non-alphabetical commands '\'-'\"'/*
{
type = 3;
}
return type;
}
/**
* convert Op value to String
*
* @param p is current op number
* @param loseSlashPrefix
* @return
*/
public String generateOpAsString(final int p, final boolean loseSlashPrefix) {
final byte[] dataStream = characterStream;
final String s;
int start = this.opStart[p];
//remove / on keys
if (loseSlashPrefix && dataStream[start] == 47) {
start++;
}
int end = this.opEnd[p];
//lose spaces or returns at end
while ((dataStream[end] == 32) || (dataStream[end] == 13) || (dataStream[end] == 10)) {
end--;
}
final int count = end - start + 1;
//discount duplicate spaces
int spaces = 0;
for (int ii = 0; ii < count; ii++) {
if ((ii > 0) && ((dataStream[start + ii] == 32) || (dataStream[start + ii] == 13) || (dataStream[start + ii] == 10)) &&
((dataStream[start + ii - 1] == 32) || (dataStream[start + ii - 1] == 13) || (dataStream[start + ii - 1] == 10))) {
spaces++;
}
}
final char[] charString = new char[count - spaces];
int pos = 0;
for (int ii = 0; ii < count; ii++) {
if ((ii > 0) && ((dataStream[start + ii] == 32) || (dataStream[start + ii] == 13) || (dataStream[start + ii] == 10)) &&
((dataStream[start + ii - 1] == 32) || (dataStream[start + ii - 1] == 13) || (dataStream[start + ii - 1] == 10))) {
} else {
if ((dataStream[start + ii] == 10) || (dataStream[start + ii] == 13)) {
charString[pos] = ' ';
} else {
charString[pos] = (char) dataStream[start + ii];
}
pos++;
}
}
s = String.copyValueOf(charString);
return s;
}
public final float parseFloat(final int id) {
final byte[] stream = characterStream;
final float f;
final int start = opStart[id];
final int charCount = opEnd[id] - start;
int floatptr = charCount, intStart = 0;
boolean isMinus = false;
//hand optimised float code
//find decimal point
for (int j = charCount - 1; j > -1; j--) {
if (stream[start + j] == 46) { //'.'=46
floatptr = j;
break;
}
}
int intChars = floatptr;
//allow for minus
if (stream[start] == 43) { //'+'=43
intChars--;
intStart++;
} else if (stream[start] == 45) { //'-'=45
//intChars--;
intStart++;
isMinus = true;
}
//optimisations
final int intNumbers = intChars - intStart;
int decNumbers = charCount - floatptr;
if (intNumbers > 3 || decNumbers > 11) { //non-optimised to cover others (tiny decimals on big scaling can add up to a big diff)
isMinus = false;
f = Float.parseFloat(this.generateOpAsString(id, false));
} else {
if (decNumbers > 6) { //old code used this accuracy so kept to avoid lots of minor changes
decNumbers = 6;
}
f = NumberUtils.convertFloatFromStream(stream, start + intStart, start + floatptr, intNumbers, decNumbers);
}
if (isMinus) {
return -f;
} else {
return f;
}
}
public float[] getValuesAsFloat() {
if (this.characterStream[opStart[0]] == 91) { // [0.0 0.0 0.0]
return readFloatArray();
} else {
final float[] op = new float[operandCount];
for (int i = 0; i < operandCount; i++) {
op[i] = parseFloat(i);
}
return op;
}
}
private float[] readFloatArray() {
final int start = opStart[0];
final int end = this.opEnd[0];
int count = 0;
int startPtr, endPtr;
final ArrayList values = new ArrayList();
for (int chars = start + 1; chars < end; chars++) {
char c = (char) characterStream[chars];
//gap
while (c != '.' && c != '-' && (c < '0' || c > '9')) {
chars++;
c = (char) characterStream[chars];
}
startPtr = chars;
//number
while (c == '.' || c == '-' || (c >= '0' && c <= '9')) {
chars++;
c = (char) characterStream[chars];
}
endPtr = chars;
count++;
values.add(NumberUtils.parseFloat(startPtr, endPtr - startPtr, characterStream));
}
final float[] op = new float[count];
for (int i = 0; i < count; i++) {
op[i] = (values.get(i));
}
return op;
}
public String[] getValuesAsString() {
final String[] op = new String[operandCount];
for (int i = 0; i < operandCount; i++) {
op[i] = generateOpAsString(i, true);
}
return op;
}
public final int parseInt() {
final int start = opStart[0];
final int end = this.opEnd[0];
final byte[] stream = characterStream;
int number = 0;
final int id = 0;
final int charCount = end - start;
int intStart = 0;
boolean isMinus = false;
int intChars = charCount;
//allow for minus
if (stream[start] == 43) { //'+'=43
intChars--;
intStart++;
} else if (stream[start] == 45) { //'-'=45
//intChars--;
intStart++;
isMinus = true;
}
//optimisations
final int intNumbers = intChars - intStart;
if ((intNumbers > 6)) { //non-optimised to cover others
isMinus = false;
number = Integer.parseInt(generateOpAsString(id, false));
} else { //optimised lookup version
int c;
for (int jj = 5; jj > -1; jj--) {
if (intNumbers > jj) {
c = stream[start + intStart] - 48;
number += intValues[5 - jj][c];
intStart++;
}
}
}
if (isMinus) {
return -number;
} else {
return number;
}
}
public void reset() {
currentOp = 0;
operandCount = 0;
}
public int getOperandCount() {
return operandCount;
}
public byte[] getStream() {
return this.characterStream;
}
public int getcurrentOp() {
return currentOp;
}
}