com.adobe.fontengine.font.postscript.Tokenizer Maven / Gradle / Ivy

Go to download
/*
 *
 *	File: Tokenizer.java
 *
 * ****************************************************************************
 *
 *	ADOBE CONFIDENTIAL
 *	___________________
 *
 *	Copyright 2004-2005 Adobe Systems Incorporated
 *	All Rights Reserved.
 * 
 *	NOTICE: All information contained herein is, and remains the property of
 *	Adobe Systems Incorporated and its suppliers, if any. The intellectual
 *	and technical concepts contained herein are proprietary to Adobe Systems
 *	Incorporated and its suppliers and may be covered by U.S. and Foreign
 *	Patents, patents in process, and are protected by trade secret or
 *	copyright law. Dissemination of this information or reproduction of this
 *	material is strictly forbidden unless prior written permission is obtained
 *	from Adobe Systems Incorporated.
 *
 */
package com.adobe.fontengine.font.postscript;

import java.io.IOException;
import com.adobe.fontengine.font.InvalidFontException;
import com.adobe.fontengine.font.FontInputStream;

/**
 * A postscript tokenizer. 
 *
 * Synchronization
 * 
 * This class is NOT threadsafe. Multiple instances can safely
 * coexist without threadsafety issues, but each must only be accessed 
 * from one thread (or must be guarded by the client).
 */
final public class Tokenizer {
    private FontInputStream fontInputStream;
    private Token token;
    private Reader reader;
    
    /* Selected PostScript lexical classes */
    private static final int N= 1;	/* Newline (\n \r) */
    private static final int W = 2;	/* Whitespace (\0 \t \n \f \r space) */ 
    private static final int S = 4;	/* Special (delimeter: ( ) < > [ ] { } / %) */
    private static final int D = 8;	/* Decimal digit (0-9)*/
    private static final int P = 16;	/* Decimal point (period) */
    private static final int G = 32;	/* Sign (+ -) */
    private static final int E = 64;	/* Exponent (E e) */

    /* Index by ascii character and return lexical class(es) */
    private static final int lexicalClass[] =
    	{
            W,		0,		0,		0,		0,		0,		0,		0,	 /* 00-07 */
            0,		W,		W|N,	0,	   	W, 		W|N,	0,		0,	 /* 08-0f */
    		0,		0,		0,		0,		0,		0,		0,		0,	 /* 10-17 */
    		0,		0,		0,		0,		0,		0,		0,		0,	 /* 18-1f */
    		W,		0,		0,		0,		0,		S,		0,		0,	 /* 20-27 */
    		S,		S,		0,		G,		0,		G,		P,		S,	 /* 28-2f */
    		D,		D,		D,		D,		D,		D,		D,		D,	 /* 30-37 */
    		D,		D,		0,		0,		S,		0,		S,		0,	 /* 38-3f */
    		0,		0,		0,		0,		0,		E,		0,		0,	 /* 40-47 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 48-4f */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 50-57 */
    	    0,		0,		0,		S,		0,		S,		0,		0,	 /* 58-5f */
    		0,		0,		0,		0,		0,		E,		0,		0,	 /* 60-67 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 68-6f */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 70-77 */
    	    0,		0,		0,		S,		0,		S,		0,		0,	 /* 78-7f */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 80-88 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 88-8f */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 90-98 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* 98-9f */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* a0-a8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* a8-af */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* b0-b8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* b8-bf */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* c0-c8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* c8-cf */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* d0-d8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* d8-df */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* e0-e8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* e8-ef */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* f0-f8 */
    	    0,		0,		0,		0,		0,		0,		0,		0,	 /* f8-ff */

    	};
    
    /* Index by ascii char and return digit value (to radix 36) or error (99) */
    static final byte digit[] =
    	{
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 00-0f */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 10-1f */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 20-2f */
          0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 99, 99, 99, 99, 99, 99,/* 30-3f */
         99, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,/* 40-4f */
         25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 99, 99, 99, 99, 99,/* 50-5f */
         99, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,/* 60-6f */
         25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 99, 99, 99, 99, 99,/* 70-7f */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 80-8f */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* 90-9f */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* a0-af */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* b0-bf */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* c0-cf */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* d0-df */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* e0-ef */
         99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99,/* f0-ff */
    	};

    public static final boolean isWhite(int c)		{ return (lexicalClass[c]&W) != 0; }
    public static final boolean isNewLine(int c)	{ return (lexicalClass[c]&N) != 0; }
    public static final boolean isDelimiter(int c)	{ return (lexicalClass[c]&(S|W)) != 0; }
    public static final boolean isSign(int c)		{ return (lexicalClass[c]&G) != 0; }
    public static final boolean isExponent(int c)	{ return (lexicalClass[c]&E) != 0; }
 
    public static final boolean isDigit(int c)		{ return digit[c]<10; }
    public static final boolean isHex(int c)		{ return digit[c]<16; }
    public static final boolean isRadix(int c, int b)	{ return digit[c]<(b); }
    
    public static final byte digitValue(byte c) {return digit[c];}

    public Tokenizer(FontInputStream is)
    {
        token = new Token();
        fontInputStream = is;
        reader = new PlainReader();
    }
    
    private void addByte(byte newByte)
    {
        byte[] newBuffer;
        
        if (token.tokenLength < token.buff.length)
        {
            token.buff[token.tokenLength++] = newByte;
            return;
        }
        
        // not enough room...grow the buffer.
        
        newBuffer = new byte[token.tokenLength + 256];
        
        System.arraycopy(token.buff, 0, newBuffer, 0, token.tokenLength);
        newBuffer[token.tokenLength++] = newByte;
        
        token.buff = newBuffer;
    }
    
    private void skipComment()
    	throws IOException, InvalidFontException
	{
        int c;
    	for (;;)
    	{
    		c = reader.read(fontInputStream);
    		if (isNewLine(c))
    			break;
    	}
    	
        reader.unreadLast(fontInputStream);

    }
    
    private void skipString()
    	throws IOException, InvalidFontException
	{
    	int cnt = 1;	/* Already seen '(' */
    	do 
    	{
    	    int c = reader.read(fontInputStream);

    		switch (c)
    		{
    		case '\\':
    		    addByte((byte)c);
    		    
    		    /* Skip escaped character */
    		    c = reader.read(fontInputStream);
    			
    			addByte((byte)c);
    			break;
    			
    		case '(':
    		    addByte((byte)c);
    			cnt++;
    			break;
    			
    		case ')':
    			cnt--;
   			    addByte((byte)c);
    			break;
    			
   			default:
   			    addByte((byte)c);
    			break;
    			
    		}
    	}
    	while (cnt > 0);

    }
    
    private void skipArray()
    	throws IOException, InvalidFontException
	{
    	int cnt = 1;	/* Already seen '[' */
    	do
    	{
    	    int c = reader.read(fontInputStream);
    		switch (c)
    			{
    		case '%':
    			skipComment();
    			break;
    			
    		case '(':
    		    addByte((byte)c);
    			skipString();
    			break;
    			
    		case '[':
    		    addByte((byte)c);
    			cnt++;
    			break;
    			
    		case ']':
   			    addByte((byte)c);
    			cnt--;
    			break;
    			
   			default:
   			    addByte((byte)c);
    			break;  
    			
    			}
    	}
    	while (cnt > 0);
    }
    
    private void skipProcedure()
    	throws IOException, InvalidFontException
	{
    	int cnt = 1;	/* Already seen '{' */
    	do
    	{
    	    int c = reader.read(fontInputStream);
    		switch (c)
    		{
    		case '%':
    			skipComment();
    			break;
    			
    		case '(':
    		    addByte((byte)c);
    			skipString();
    			break;
    			
    		case '{':
    		    addByte((byte)c);
    			cnt++;
    			break;
    			
    		case '}':
    			addByte((byte)c);
    			cnt--;
    			break;
    			
    		default:
    		    addByte((byte)c);
    		    break;
    		    
    		}
    	}
    	while (cnt > 0);
    }
    
    private void skipDictionary()
    	throws IOException, InvalidFontException
	{
    	for (;;)
    	{
    	    int c = reader.read(fontInputStream);
    	    
    		switch (c)
    		{
    		case '>':
        	    addByte((byte)c);
    		    c = reader.read(fontInputStream);
    			switch (c)
    			{
    			case '>':
    			    addByte((byte)c);
    				return;
    			}
    			
    	        reader.unreadLast(fontInputStream);
    			break;
    			
    		case '%':
    			skipComment();
    			break;
    			
    		case '(':
    		    addByte((byte)c);
    			skipString();
    			break;
    			
    		case '<':
    		    addByte((byte)c);
    			skipAngle();
    			break;
    			
    		default:
    		    addByte((byte)c);
    			break;
    			
    		}
    	}
    }
    
    private TokenType skipAngle()
    	throws IOException, InvalidFontException
	{
	    int c = reader.read(fontInputStream);
	    
		switch (c)
		{
		case '<':
		    addByte((byte)c);
			skipDictionary();
			return TokenType.kDICTIONARY;
			
		case '~':
		    addByte((byte)c);
		    
			/* ASCII 85 string */
			for (;;)
			{
				c = reader.read(fontInputStream);
				addByte((byte)c);
				
				if (c == '~')
				{
				    c = reader.read(fontInputStream);
				    addByte((byte)c);
				    
					switch (c)
					{
					case '>':
						return TokenType.kASCII85;
					}
				}
				else if ((c < '!' || c > 'u') && !isWhite(c) && c != 'z')
				{
				    throw new InvalidFontException("invalid ascii85 string");
				}
			}
			
		default:
		{
		    addByte((byte)c);
		    
			/* Skip hexadecimal string */
			do
			{
				if (!isHex(c) && !isWhite(c))
				{
				    throw new InvalidFontException("invalid hex string");
				}
				
				c = reader.read(fontInputStream);
				
				addByte((byte)c);
			}
			while (c != '>');
			
			return TokenType.kHEXSTRING;
		}
		
		}
	}
    
    private TokenType skipNumber(int c)
    	throws IOException, InvalidFontException
	{
		int state;
		boolean operatorFound = false;
	
		/* Determine initial state */
		if (isDigit(c))
			state = 1;
		else if (isSign(c))
			state = 2;
		else if (c == '.')
			state = 3;
		else
		{
		    state = 0;
			operatorFound = true;
		}
	
		while (!operatorFound)
		{
			c = reader.read(fontInputStream);
			
			if (isDelimiter(c))
				/* Determine token type by examining finish state */
				switch (state)
				{
				case 2:		/* [+-] */
				case 3:		/* [+-]?\. */
				case 5:		/* d# */
				case 8:		/* (d.|.d|d.d)[Ee] */
				case 11:	/* (d.|.d|d.d)[Ee][+-] */
					/* Nearly, but not quite, a number */
	    	        reader.unreadLast(fontInputStream);
					return TokenType.kOPERATOR;
					
				case 1:		/* d */
				case 6:		/* [+-]d */
				case 9:		/* d#d */
	    	        reader.unreadLast(fontInputStream);
					return TokenType.kINTEGER;
					
				case 4:		/* d. */
				case 7:		/* .d  d.d	[+-](.d|d.|d.d) */
				case 10:	/* [+-]{0,1}(.d|d.|d.d)[Ee][+-]?d */
				    reader.unreadLast(fontInputStream);
					return TokenType.kREAL;
				}
	
			addByte((byte)c);
			
			/* Determine next state */
			switch (state)
			{
			case 1:
				if (c == '.')
					state = 4;
				else if (c == '#')
					state = 5;
				else if (!isDigit(c))
				    operatorFound = true;
				break;
			case 2:
				if (isDigit(c))
					state = 6;
				else if (c == '.')
					state = 3;
				else
				    operatorFound = true;
				break;
			case 3:
				if (isDigit(c))
					state = 7;
				else
				    operatorFound = true;
				break;
			case 4:
				if (isDigit(c))
					state = 7;
				else if (isExponent(c))
					state = 8;
				else
				    operatorFound = true;
				break;
			case 5:
			    // this just makes sure it could be a valid radix...not that it is.
				if (isRadix(c, 36))
					state = 9;
				else
				    operatorFound = true;
				break;
			case 6:
				if (c == '.')
					state = 7;
				else if (!isDigit(c))
				    operatorFound = true;
				break;
			case 7:
				if (isExponent(c))
					state = 8;
				else if (!isDigit(c))
				    operatorFound = true;
				break;
			case 8:
				if (isDigit(c))
					state = 10;
				else if (isSign(c))
					state = 11;
				else
				    operatorFound = true;
				break;
			case 9:
				if (!isRadix(c, 36))
				    operatorFound = true;
				break;
			case 10:
				if (!isDigit(c))
				    operatorFound = true;
				break;
			case 11:
				if (isDigit(c))
					state = 10;
				else
				    operatorFound = true;
				break;
			}
		}
	
		/* Non-numeric character encountered, skip to delimeter */
		skipToDelimiter();
		return TokenType.kOPERATOR;
	}
    
    private void skipToDelimiter()
    	throws IOException, InvalidFontException
    {
        int nextByte;
        for (nextByte = reader.read(fontInputStream);
        	nextByte != -1 && !isDelimiter(nextByte);
        	nextByte = reader.read(fontInputStream)) 
        {
            addByte((byte)nextByte);
        } 
        
        if (nextByte != -1) {
            reader.unreadLast(fontInputStream);
        }
    }
    
    private void gotoNextLine()
		throws  InvalidFontException,
			IOException, 
			IndexOutOfBoundsException
    {
        int nextByte;
        for (nextByte = reader.read(fontInputStream); nextByte != -1; nextByte = reader.read(fontInputStream))
        {
            if (isNewLine(nextByte))
                return;
        }
    }
    
    public Token getNextPSToken()
    	throws  InvalidFontException,
    	IOException, 
    	IndexOutOfBoundsException
    {
        int nextByte; 					// the current byte from the input stream
        
        token.tokenLength = 0;
        
        
        // read until we get to a token we want to return
        for (nextByte = reader.read(fontInputStream);;nextByte = reader.read(fontInputStream))
        {
          	if (isWhite(nextByte))
          	    continue;
            	
            else if (nextByte == '%')
            {    
                skipComment(); 
                continue;
            }
    
            break; 
        }
        
        // nextByte is the start of the token to be returned.
        
        addByte((byte)nextByte);
        
        switch (nextByte){
        case '/':
        {   
            nextByte = reader.read(fontInputStream);
            
            switch(nextByte)
            {
            case -1:
                throw new InvalidFontException("unexpected end of token");
            
            case '/':
                addByte((byte)nextByte);
                token.tokenType = TokenType.kIMMEDIATE;
                break;
                
            default:
                token.tokenType = TokenType.kLITERAL;
            	reader.unreadLast(fontInputStream);   // unread instead of adding it since this could be an empty name
                break;
            }
             
            skipToDelimiter();
            break;
        }   
        case '{':      
            skipProcedure();
            token.tokenType = TokenType.kPROCEDURE;
            break;
            
        case '(':
            skipString();
            token.tokenType = TokenType.kSTRING;
            break;
        
        case '[':
            skipArray();
            token.tokenType = TokenType.kARRAY;
            break;
            
        case '<':
            token.tokenType = skipAngle();
            break;

            
        case '0': 
        case '1':
        case '2': 
        case '3': 
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
        case '.':
        case '+':
        case '-':
            token.tokenType = skipNumber(nextByte);
            break;
            
        default:
        	skipToDelimiter();
            token.tokenType = TokenType.kOPERATOR;
            break;
        	
        }

        return token;
    }
    
    
    public void findToken(byte[] tokenToFind)
    	throws IOException, InvalidFontException
    {
        Token token;
        
        do {
            token = getNextPSToken();
        } while (!token.matches(tokenToFind));
        
    }
    
    /**
     * Looks for one of the tokens in tokenToFind in the current stream. Will read until the
     * end of the stream if none of the tokens is found.
     * @param tokenToFind
     * @return the element in tokenToFind that was found, or null if none of the tokens was found.
     * @throws IOException
     * @throws InvalidFontException
     */
    public byte[] findOptionalTokensAtStartOfLine(byte[][] tokenToFind)
	throws IOException, InvalidFontException
    {
        Token token;
        
        do {
            gotoNextLine();
            try {
                token = getNextPSToken();
            } catch (InvalidFontException e)
            {
                // If we hit an EOF, stop
                return null;
            }
            
            if (token.isEOL())
                return null;
            
            for (int i = 0; i < tokenToFind.length; i++)
                if (token.matches(tokenToFind[i]))
                    return tokenToFind[i];
        } while (true);
    }
    
    
    public int read() 
    	throws IOException, InvalidFontException
    {
        return reader.read(fontInputStream);
    }
    
    public void setReader(Reader newReader)
    	throws InvalidFontException
    {
        
        if (reader.getClass() == newReader.getClass())
            throw new InvalidFontException("eexec done twice?");
    
        reader = newReader;
    }
  
}