All Downloads are FREE. Search and download functionalities are using the official Maven repository.

antlr.TokenStreamRewriteEngine Maven / Gradle / Ivy

There is a newer version: 3.3.8
Show newest version
package antlr;

/* ANTLR Translator Generator
 * Project led by Terence Parr at http://www.cs.usfca.edu
 * Software rights: http://www.antlr.org/license.html
 */

import antlr.ASdebug.ASDebugStream;
import antlr.ASdebug.IASDebugStream;
import antlr.ASdebug.TokenOffsetInfo;
import antlr.collections.impl.BitSet;

import java.util.*;

/** This token stream tracks the *entire* token stream coming from
 *  a lexer, but does not pass on the whitespace (or whatever else
 *  you want to discard) to the parser.
 *
 *  This class can then be asked for the ith token in the input stream.
 *  Useful for dumping out the input stream exactly after doing some
 *  augmentation or other manipulations.  Tokens are index from 0..n-1
 *
 *  You can insert stuff, replace, and delete chunks.  Note that the
 *  operations are done lazily--only if you convert the buffer to a
 *  String.  This is very efficient because you are not moving data around
 *  all the time.  As the buffer of tokens is converted to strings, the
 *  toString() method(s) check to see if there is an operation at the
 *  current index.  If so, the operation is done and then normal String
 *  rendering continues on the buffer.  This is like having multiple Turing
 *  machine instruction streams (programs) operating on a single input tape. :)
 *
 *  Since the operations are done lazily at toString-time, operations do not
 *  screw up the token index values.  That is, an insert operation at token
 *  index i does not change the index values for tokens i+1..n-1.
 *
 *  Because operations never actually alter the buffer, you may always get
 *  the original token stream back without undoing anything.  Since
 *  the instructions are queued up, you can easily simulate transactions and
 *  roll back any changes if there is an error just by removing instructions.
 *  For example,
 *
 * 		TokenStreamRewriteEngine rewriteEngine =
 * 			new TokenStreamRewriteEngine(lexer);
 *      JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
 *      ...
 *      rewriteEngine.insertAfter("pass1", t, "foobar");}
 * 		rewriteEngine.insertAfter("pass2", u, "start");}
 * 		System.out.println(rewriteEngine.toString("pass1"));
 * 		System.out.println(rewriteEngine.toString("pass2"));
 *
 *  You can also have multiple "instruction streams" and get multiple
 *  rewrites from a single pass over the input.  Just name the instruction
 *  streams and use that name again when printing the buffer.  This could be
 *  useful for generating a C file and also its header file--all from the
 *  same buffer.
 *
 *  If you don't use named rewrite streams, a "default" stream is used.
 *
 *  Terence Parr, parrt at antlr.org
 *  University of San Francisco
 *  February 2004
 */
public class TokenStreamRewriteEngine implements TokenStream, IASDebugStream {
	public static final int MIN_TOKEN_INDEX = 0;

	static class RewriteOperation {
		protected int index;
		protected String text;
		protected RewriteOperation(int index, String text) {
			this.index = index;
			this.text = text;
		}
		/** Execute the rewrite operation by possibly adding to the buffer.
		 *  Return the index of the next token to operate on.
		 */
		public int execute(StringBuffer buf) {
			return index;
		}
		public String toString() {
			String opName = getClass().getName();
			int $index = opName.indexOf('$');
			opName = opName.substring($index+1, opName.length());
			return opName+"@"+index+'"'+text+'"';
		}
	}

	static class InsertBeforeOp extends RewriteOperation {
		public InsertBeforeOp(int index, String text) {
			super(index,text);
		}
		public int execute(StringBuffer buf) {
			buf.append(text);
			return index;
		}
	}

	/** I'm going to try replacing range from x..y with (y-x)+1 ReplaceOp
	 *  instructions.
	 */
	static class ReplaceOp extends RewriteOperation {
		protected int lastIndex;
		public ReplaceOp(int from, int to, String text) {
			super(from,text);
			lastIndex = to;
		}
		public int execute(StringBuffer buf) {
			if ( text!=null ) {
				buf.append(text);
			}
			return lastIndex+1;
		}
	}

	static class DeleteOp extends ReplaceOp {
		public DeleteOp(int from, int to) {
			super(from, to, null);
		}
	}

	public static final String DEFAULT_PROGRAM_NAME = "default";
    public static final int PROGRAM_INIT_SIZE = 100;

	/** Track the incoming list of tokens */
	protected List tokens;

	/** You may have multiple, named streams of rewrite operations.
	 *  I'm calling these things "programs."
	 *  Maps String (name) -> rewrite (List)
	 */
	protected Map programs = null;

	/** Map String (program name) -> Integer index */
	protected Map lastRewriteTokenIndexes = null;

	/** track index of tokens */
	protected int index = MIN_TOKEN_INDEX;

	/** Who do we suck tokens from? */
	protected TokenStream stream;

	/** Which (whitespace) token(s) to throw out */
	protected BitSet discardMask = new BitSet();

	public TokenStreamRewriteEngine(TokenStream upstream) {
		this(upstream,1000);
	}

	public TokenStreamRewriteEngine(TokenStream upstream, int initialSize) {
		stream = upstream;
		tokens = new ArrayList(initialSize);
		programs = new HashMap();
		programs.put(DEFAULT_PROGRAM_NAME,
							   new ArrayList(PROGRAM_INIT_SIZE));
		lastRewriteTokenIndexes = new HashMap();
	}

	public Token nextToken() throws TokenStreamException {
		TokenWithIndex t;
		// suck tokens until end of stream or we find a non-discarded token
		do {
           	t = (TokenWithIndex)stream.nextToken();
			if ( t!=null ) {
				t.setIndex(index);  // what is t's index in list?
				if ( t.getType()!=Token.EOF_TYPE ) {
					tokens.add(t);  // track all tokens except EOF
				}
				index++;			// move to next position
			}
		} while ( t!=null && discardMask.member(t.getType()) );
		return t;
	}

	public void rollback(int instructionIndex) {
		rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
	}

	/** Rollback the instruction stream for a program so that
	 *  the indicated instruction (via instructionIndex) is no
	 *  longer in the stream.  UNTESTED!
	 */
	public void rollback(String programName, int instructionIndex) {
		List is = (List)programs.get(programName);
		if ( is!=null ) {
			programs.put(programName, is.subList(MIN_TOKEN_INDEX,instructionIndex));
		}
	}

	public void deleteProgram() {
		deleteProgram(DEFAULT_PROGRAM_NAME);
	}

	/** Reset the program so that no instructions exist */
	public void deleteProgram(String programName) {
		rollback(programName, MIN_TOKEN_INDEX);
	}

	/** If op.index > lastRewriteTokenIndexes, just add to the end.
	 *  Otherwise, do linear */
	protected void addToSortedRewriteList(RewriteOperation op) {
		addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
	}

	/** old; before moving v3 stuff in
	protected void addToSortedRewriteList(String programName, RewriteOperation op) {
		List rewrites = getProgram(programName);
		// if at or beyond last op's index, just append
		if ( op.index>=getLastRewriteTokenIndex(programName) ) {
			rewrites.add(op); // append to list of operations
			// record the index of this operation for next time through
			setLastRewriteTokenIndex(programName, op.index);
			return;
		}
		// not after the last one, so must insert to ordered list
		Comparator comparator = new Comparator() {
			public int compare(Object o, Object o1) {
				RewriteOperation a = (RewriteOperation)o;
				RewriteOperation b = (RewriteOperation)o1;
				if ( a.indexb.index ) return 1;
				return 0;
			}
		};
        int pos = Collections.binarySearch(rewrites, op, comparator);
		if ( pos<0 ) {
			rewrites.add(-pos-1, op);
		}
	}
*/

	/** Add an instruction to the rewrite instruction list ordered by
	 *  the instruction number (use a binary search for efficiency).
	 *  The list is ordered so that toString() can be done efficiently.
	 *
	 *  When there are multiple instructions at the same index, the instructions
	 *  must be ordered to ensure proper behavior.  For example, a delete at
	 *  index i must kill any replace operation at i.  Insert-before operations
	 *  must come before any replace / delete instructions.  If there are
	 *  multiple insert instructions for a single index, they are done in
	 *  reverse insertion order so that "insert foo" then "insert bar" yields
	 *  "foobar" in front rather than "barfoo".  This is convenient because
	 *  I can insert new InsertOp instructions at the index returned by
	 *  the binary search.  A ReplaceOp kills any previous replace op.  Since
	 *  delete is the same as replace with null text, i can check for
	 *  ReplaceOp and cover DeleteOp at same time. :)
	 */
	protected void addToSortedRewriteList(String programName, RewriteOperation op) {
		List rewrites = getProgram(programName);
		//System.out.println("### add "+op+"; rewrites="+rewrites);
		Comparator comparator = new Comparator() {
			public int compare(Object o, Object o1) {
				RewriteOperation a = (RewriteOperation)o;
				RewriteOperation b = (RewriteOperation)o1;
				if ( a.indexb.index ) return 1;
				return 0;
			}
		};
        int pos = Collections.binarySearch(rewrites, op, comparator);
		//System.out.println("bin search returns: pos="+pos);

		if ( pos>=0 ) {
			// binarySearch does not guarantee first element when multiple
			// are found.  I must seach backwards for first op with op.index
			for (; pos>=0; pos--) {
				RewriteOperation prevOp = (RewriteOperation)rewrites.get(pos);
				if ( prevOp.index=MIN_TOKEN_INDEX && i<=end && i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy