org.modelcc.lexer.flex.Flex Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of ModelCC Show documentation

ModelCC is a model-based parser generator (a.k.a. compiler compiler) that decouples language specification from language processing, avoiding some of the problems caused by grammar-driven parser generators. ModelCC receives a conceptual model as input, along with constraints that annotate it. It is then able to create a parser for the desired textual language and the generated parser fully automates the instantiation of the language conceptual model. ModelCC also includes a built-in reference resolution mechanism that results in abstract syntax graphs, rather than mere abstract syntax trees.

The newest version!

/*
 * ModelCC, distributed under ModelCC Shared Software License, www.modelcc.org
 */

package org.modelcc.lexer.flex;

import java.io.BufferedReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;

import org.modelcc.io.file.ReaderCharSequence;
import org.modelcc.language.lexis.LexicalSpecification;
import org.modelcc.language.lexis.TokenSpecification;
import org.modelcc.lexer.LexicalGraph;
import org.modelcc.lexer.Token;
import org.modelcc.lexer.Tokenizer;
import org.modelcc.lexer.recognizer.MatchedObject;

/**
 * FLex - Fast Lexer
 * 
 * @author Fernando Berzal ([email protected])
 */
public class Flex extends Tokenizer implements Serializable 
{
	/** 
	 * Input
	 */
	private ReaderCharSequence input;
        
    /**
     * Token specifications
     */
	private List stspecs;
	
	/**
	 * Current token
	 */
	private Token currentToken;
    
	/**
	 * Current token specification
	 */
	private TokenSpecification currentTS;
    
	/**
	 * Current input position
	 */
    int currentPosition = 0;

    
    /**
     * Constructor
     * @param lexis lexer specification
     * @param reader input reader
     */
    public Flex (LexicalSpecification lexis, Reader reader)
    {
    	super(lexis,reader);

    	stspecs = lexis.getTokenSpecifications();
    	input = new ReaderCharSequence( new BufferedReader(reader) );
        currentPosition = 0;
    }


    /**
     * Perform lexical analysis (a.k.a. scanning).
     * @return the lexical graph obtained from this tokenizer.
     */
    public LexicalGraph scan () 
    {                
    	List tokens = new ArrayList();    
    	Token token = nextToken();

        while (token!=null) {
       		tokens.add(token);
        	token = nextToken();
        }
        
        return buildLexicalGraph(tokens);
    }
    

    // Tokenizer interface
    // -------------------
  
    public Token nextToken ()
    {
    	Token token;
    	
    	do {
    		token = nextInputToken();
    	} while ((token!=null) && getCurrentTokenSpecification().isSkipped());
    	
    	return token;
    }
    
    public Token nextInputToken ()
    {
		currentToken = null;
		currentTS = null;

		for (TokenSpecification ts: stspecs) {  // Try to match all tokens
			
			MatchedObject match = ts.getRecognizer().read(input,currentPosition);
			
			if (match != null) {
				
				if (  ( currentToken==null) 
				   || ( currentToken.length() < match.getText().length())
				   || ( lexis.precedes(ts,currentTS) ) ) {
					
    				Token t = new Token(ts.getType(),match.getObject(),match.getText(),currentPosition,input.lineAt(currentPosition));

    				if (build(ts,t)) {
    					currentTS = ts;
    					currentToken = t;
    				}
				}
			}    			
		}

		if (currentToken!=null) {
			currentPosition = currentToken.getEndIndex() + 1;
			input.prune(currentToken.getEndIndex());
		}
		
		return currentToken;
    }
    
    
    // Getters
    // -------
    
    public TokenSpecification getCurrentTokenSpecification ()
    {
    	return currentTS;
    }
    
    public Token getCurrentToken ()
    {
    	return currentToken;
    }
    
    public int getCurrentPosition ()
    {
    	return currentPosition;
    }
    
    // Ancillary routines
    // ------------------

    private boolean build(TokenSpecification m,Token t) 
    {
    	Object obj = m.getBuilder().build(t);
    	
    	t.setUserData(obj);
    	
    	return (obj!=null);
    }
    
    // Lexical graph generation
    // ------------------------
    
    private LexicalGraph buildLexicalGraph (List tokens) 
    {
    	int inputStart = 0;
    	int inputEnd = input.length()-1;
    	
    	LexicalGraph graph = new LexicalGraph(tokens,inputStart,inputEnd);

    	for (int i=1; i0) 
    		graph.addStartToken(tokens.get(0));

        return graph;
	}
}