
com.adobe.fontengine.font.postscript.Tokenizer Maven / Gradle / Ivy
/*
*
* File: Tokenizer.java
*
* ****************************************************************************
*
* ADOBE CONFIDENTIAL
* ___________________
*
* Copyright 2004-2005 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains the property of
* Adobe Systems Incorporated and its suppliers, if any. The intellectual
* and technical concepts contained herein are proprietary to Adobe Systems
* Incorporated and its suppliers and may be covered by U.S. and Foreign
* Patents, patents in process, and are protected by trade secret or
* copyright law. Dissemination of this information or reproduction of this
* material is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
*
*/
package com.adobe.fontengine.font.postscript;
import java.io.IOException;
import com.adobe.fontengine.font.InvalidFontException;
import com.adobe.fontengine.font.FontInputStream;
/**
* A postscript tokenizer.
*
* Synchronization
*
* This class is NOT threadsafe. Multiple instances can safely
* coexist without threadsafety issues, but each must only be accessed
* from one thread (or must be guarded by the client).
*/
final public class Tokenizer {
private FontInputStream fontInputStream;
private Token token;
private Reader reader;
/* Selected PostScript lexical classes */
private static final int N= 1; /* Newline (\n \r) */
private static final int W = 2; /* Whitespace (\0 \t \n \f \r space) */
private static final int S = 4; /* Special (delimeter: ( ) < > [ ] { } / %) */
private static final int D = 8; /* Decimal digit (0-9)*/
private static final int P = 16; /* Decimal point (period) */
private static final int G = 32; /* Sign (+ -) */
private static final int E = 64; /* Exponent (E e) */
/* Index by ascii character and return lexical class(es) */
private static final int lexicalClass[] =
{
W, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */
0, W, W|N, 0, W, W|N, 0, 0, /* 08-0f */
0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */
0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */
W, 0, 0, 0, 0, S, 0, 0, /* 20-27 */
S, S, 0, G, 0, G, P, S, /* 28-2f */
D, D, D, D, D, D, D, D, /* 30-37 */
D, D, 0, 0, S, 0, S, 0, /* 38-3f */
0, 0, 0, 0, 0, E, 0, 0, /* 40-47 */
0, 0, 0, 0, 0, 0, 0, 0, /* 48-4f */
0, 0, 0, 0, 0, 0, 0, 0, /* 50-57 */
0, 0, 0, S, 0, S, 0, 0, /* 58-5f */
0, 0, 0, 0, 0, E, 0, 0, /* 60-67 */
0, 0, 0, 0, 0, 0, 0, 0, /* 68-6f */
0, 0, 0, 0, 0, 0, 0, 0, /* 70-77 */
0, 0, 0, S, 0, S, 0, 0, /* 78-7f */
0, 0, 0, 0, 0, 0, 0, 0, /* 80-88 */
0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */
0, 0, 0, 0, 0, 0, 0, 0, /* 90-98 */
0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */
0, 0, 0, 0, 0, 0, 0, 0, /* a0-a8 */
0, 0, 0, 0, 0, 0, 0, 0, /* a8-af */
0, 0, 0, 0, 0, 0, 0, 0, /* b0-b8 */
0, 0, 0, 0, 0, 0, 0, 0, /* b8-bf */
0, 0, 0, 0, 0, 0, 0, 0, /* c0-c8 */
0, 0, 0, 0, 0, 0, 0, 0, /* c8-cf */
0, 0, 0, 0, 0, 0, 0, 0, /* d0-d8 */
0, 0, 0, 0, 0, 0, 0, 0, /* d8-df */
0, 0, 0, 0, 0, 0, 0, 0, /* e0-e8 */
0, 0, 0, 0, 0, 0, 0, 0, /* e8-ef */
0, 0, 0, 0, 0, 0, 0, 0, /* f0-f8 */
0, 0, 0, 0, 0, 0, 0, 0, /* f8-ff */
};
/* Index by ascii char and return digit value (to radix 36) or error (99) */
static final byte digit[] =
{
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 00-0f */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 10-1f */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 20-2f */
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 99, 99, 99, 99, 99, 99,/* 30-3f */
99, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,/* 40-4f */
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 99, 99, 99, 99, 99,/* 50-5f */
99, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,/* 60-6f */
25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 99, 99, 99, 99, 99,/* 70-7f */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 80-8f */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 90-9f */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* a0-af */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* b0-bf */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* c0-cf */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* d0-df */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* e0-ef */
99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* f0-ff */
};
public static final boolean isWhite(int c) { return (lexicalClass[c]&W) != 0; }
public static final boolean isNewLine(int c) { return (lexicalClass[c]&N) != 0; }
public static final boolean isDelimiter(int c) { return (lexicalClass[c]&(S|W)) != 0; }
public static final boolean isSign(int c) { return (lexicalClass[c]&G) != 0; }
public static final boolean isExponent(int c) { return (lexicalClass[c]&E) != 0; }
public static final boolean isDigit(int c) { return digit[c]<10; }
public static final boolean isHex(int c) { return digit[c]<16; }
public static final boolean isRadix(int c, int b) { return digit[c]<(b); }
public static final byte digitValue(byte c) {return digit[c];}
public Tokenizer(FontInputStream is)
{
token = new Token();
fontInputStream = is;
reader = new PlainReader();
}
private void addByte(byte newByte)
{
byte[] newBuffer;
if (token.tokenLength < token.buff.length)
{
token.buff[token.tokenLength++] = newByte;
return;
}
// not enough room...grow the buffer.
newBuffer = new byte[token.tokenLength + 256];
System.arraycopy(token.buff, 0, newBuffer, 0, token.tokenLength);
newBuffer[token.tokenLength++] = newByte;
token.buff = newBuffer;
}
private void skipComment()
throws IOException, InvalidFontException
{
int c;
for (;;)
{
c = reader.read(fontInputStream);
if (isNewLine(c))
break;
}
reader.unreadLast(fontInputStream);
}
private void skipString()
throws IOException, InvalidFontException
{
int cnt = 1; /* Already seen '(' */
do
{
int c = reader.read(fontInputStream);
switch (c)
{
case '\\':
addByte((byte)c);
/* Skip escaped character */
c = reader.read(fontInputStream);
addByte((byte)c);
break;
case '(':
addByte((byte)c);
cnt++;
break;
case ')':
cnt--;
addByte((byte)c);
break;
default:
addByte((byte)c);
break;
}
}
while (cnt > 0);
}
private void skipArray()
throws IOException, InvalidFontException
{
int cnt = 1; /* Already seen '[' */
do
{
int c = reader.read(fontInputStream);
switch (c)
{
case '%':
skipComment();
break;
case '(':
addByte((byte)c);
skipString();
break;
case '[':
addByte((byte)c);
cnt++;
break;
case ']':
addByte((byte)c);
cnt--;
break;
default:
addByte((byte)c);
break;
}
}
while (cnt > 0);
}
private void skipProcedure()
throws IOException, InvalidFontException
{
int cnt = 1; /* Already seen '{' */
do
{
int c = reader.read(fontInputStream);
switch (c)
{
case '%':
skipComment();
break;
case '(':
addByte((byte)c);
skipString();
break;
case '{':
addByte((byte)c);
cnt++;
break;
case '}':
addByte((byte)c);
cnt--;
break;
default:
addByte((byte)c);
break;
}
}
while (cnt > 0);
}
private void skipDictionary()
throws IOException, InvalidFontException
{
for (;;)
{
int c = reader.read(fontInputStream);
switch (c)
{
case '>':
addByte((byte)c);
c = reader.read(fontInputStream);
switch (c)
{
case '>':
addByte((byte)c);
return;
}
reader.unreadLast(fontInputStream);
break;
case '%':
skipComment();
break;
case '(':
addByte((byte)c);
skipString();
break;
case '<':
addByte((byte)c);
skipAngle();
break;
default:
addByte((byte)c);
break;
}
}
}
private TokenType skipAngle()
throws IOException, InvalidFontException
{
int c = reader.read(fontInputStream);
switch (c)
{
case '<':
addByte((byte)c);
skipDictionary();
return TokenType.kDICTIONARY;
case '~':
addByte((byte)c);
/* ASCII 85 string */
for (;;)
{
c = reader.read(fontInputStream);
addByte((byte)c);
if (c == '~')
{
c = reader.read(fontInputStream);
addByte((byte)c);
switch (c)
{
case '>':
return TokenType.kASCII85;
}
}
else if ((c < '!' || c > 'u') && !isWhite(c) && c != 'z')
{
throw new InvalidFontException("invalid ascii85 string");
}
}
default:
{
addByte((byte)c);
/* Skip hexadecimal string */
do
{
if (!isHex(c) && !isWhite(c))
{
throw new InvalidFontException("invalid hex string");
}
c = reader.read(fontInputStream);
addByte((byte)c);
}
while (c != '>');
return TokenType.kHEXSTRING;
}
}
}
private TokenType skipNumber(int c)
throws IOException, InvalidFontException
{
int state;
boolean operatorFound = false;
/* Determine initial state */
if (isDigit(c))
state = 1;
else if (isSign(c))
state = 2;
else if (c == '.')
state = 3;
else
{
state = 0;
operatorFound = true;
}
while (!operatorFound)
{
c = reader.read(fontInputStream);
if (isDelimiter(c))
/* Determine token type by examining finish state */
switch (state)
{
case 2: /* [+-] */
case 3: /* [+-]?\. */
case 5: /* d# */
case 8: /* (d.|.d|d.d)[Ee] */
case 11: /* (d.|.d|d.d)[Ee][+-] */
/* Nearly, but not quite, a number */
reader.unreadLast(fontInputStream);
return TokenType.kOPERATOR;
case 1: /* d */
case 6: /* [+-]d */
case 9: /* d#d */
reader.unreadLast(fontInputStream);
return TokenType.kINTEGER;
case 4: /* d. */
case 7: /* .d d.d [+-](.d|d.|d.d) */
case 10: /* [+-]{0,1}(.d|d.|d.d)[Ee][+-]?d */
reader.unreadLast(fontInputStream);
return TokenType.kREAL;
}
addByte((byte)c);
/* Determine next state */
switch (state)
{
case 1:
if (c == '.')
state = 4;
else if (c == '#')
state = 5;
else if (!isDigit(c))
operatorFound = true;
break;
case 2:
if (isDigit(c))
state = 6;
else if (c == '.')
state = 3;
else
operatorFound = true;
break;
case 3:
if (isDigit(c))
state = 7;
else
operatorFound = true;
break;
case 4:
if (isDigit(c))
state = 7;
else if (isExponent(c))
state = 8;
else
operatorFound = true;
break;
case 5:
// this just makes sure it could be a valid radix...not that it is.
if (isRadix(c, 36))
state = 9;
else
operatorFound = true;
break;
case 6:
if (c == '.')
state = 7;
else if (!isDigit(c))
operatorFound = true;
break;
case 7:
if (isExponent(c))
state = 8;
else if (!isDigit(c))
operatorFound = true;
break;
case 8:
if (isDigit(c))
state = 10;
else if (isSign(c))
state = 11;
else
operatorFound = true;
break;
case 9:
if (!isRadix(c, 36))
operatorFound = true;
break;
case 10:
if (!isDigit(c))
operatorFound = true;
break;
case 11:
if (isDigit(c))
state = 10;
else
operatorFound = true;
break;
}
}
/* Non-numeric character encountered, skip to delimeter */
skipToDelimiter();
return TokenType.kOPERATOR;
}
private void skipToDelimiter()
throws IOException, InvalidFontException
{
int nextByte;
for (nextByte = reader.read(fontInputStream);
nextByte != -1 && !isDelimiter(nextByte);
nextByte = reader.read(fontInputStream))
{
addByte((byte)nextByte);
}
if (nextByte != -1) {
reader.unreadLast(fontInputStream);
}
}
private void gotoNextLine()
throws InvalidFontException,
IOException,
IndexOutOfBoundsException
{
int nextByte;
for (nextByte = reader.read(fontInputStream); nextByte != -1; nextByte = reader.read(fontInputStream))
{
if (isNewLine(nextByte))
return;
}
}
public Token getNextPSToken()
throws InvalidFontException,
IOException,
IndexOutOfBoundsException
{
int nextByte; // the current byte from the input stream
token.tokenLength = 0;
// read until we get to a token we want to return
for (nextByte = reader.read(fontInputStream);;nextByte = reader.read(fontInputStream))
{
if (isWhite(nextByte))
continue;
else if (nextByte == '%')
{
skipComment();
continue;
}
break;
}
// nextByte is the start of the token to be returned.
addByte((byte)nextByte);
switch (nextByte){
case '/':
{
nextByte = reader.read(fontInputStream);
switch(nextByte)
{
case -1:
throw new InvalidFontException("unexpected end of token");
case '/':
addByte((byte)nextByte);
token.tokenType = TokenType.kIMMEDIATE;
break;
default:
token.tokenType = TokenType.kLITERAL;
reader.unreadLast(fontInputStream); // unread instead of adding it since this could be an empty name
break;
}
skipToDelimiter();
break;
}
case '{':
skipProcedure();
token.tokenType = TokenType.kPROCEDURE;
break;
case '(':
skipString();
token.tokenType = TokenType.kSTRING;
break;
case '[':
skipArray();
token.tokenType = TokenType.kARRAY;
break;
case '<':
token.tokenType = skipAngle();
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
case '.':
case '+':
case '-':
token.tokenType = skipNumber(nextByte);
break;
default:
skipToDelimiter();
token.tokenType = TokenType.kOPERATOR;
break;
}
return token;
}
public void findToken(byte[] tokenToFind)
throws IOException, InvalidFontException
{
Token token;
do {
token = getNextPSToken();
} while (!token.matches(tokenToFind));
}
/**
* Looks for one of the tokens in tokenToFind in the current stream. Will read until the
* end of the stream if none of the tokens is found.
* @param tokenToFind
* @return the element in tokenToFind that was found, or null if none of the tokens was found.
* @throws IOException
* @throws InvalidFontException
*/
public byte[] findOptionalTokensAtStartOfLine(byte[][] tokenToFind)
throws IOException, InvalidFontException
{
Token token;
do {
gotoNextLine();
try {
token = getNextPSToken();
} catch (InvalidFontException e)
{
// If we hit an EOF, stop
return null;
}
if (token.isEOL())
return null;
for (int i = 0; i < tokenToFind.length; i++)
if (token.matches(tokenToFind[i]))
return tokenToFind[i];
} while (true);
}
public int read()
throws IOException, InvalidFontException
{
return reader.read(fontInputStream);
}
public void setReader(Reader newReader)
throws InvalidFontException
{
if (reader.getClass() == newReader.getClass())
throw new InvalidFontException("eexec done twice?");
reader = newReader;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy