All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.modelcc.lexer.lamb.Lamb Maven / Gradle / Ivy

Go to download

ModelCC is a model-based parser generator (a.k.a. compiler compiler) that decouples language specification from language processing, avoiding some of the problems caused by grammar-driven parser generators. ModelCC receives a conceptual model as input, along with constraints that annotate it. It is then able to create a parser for the desired textual language and the generated parser fully automates the instantiation of the language conceptual model. ModelCC also includes a built-in reference resolution mechanism that results in abstract syntax graphs, rather than mere abstract syntax trees.

The newest version!
/*
 * ModelCC, distributed under ModelCC Shared Software License, www.modelcc.org
 */

package org.modelcc.lexer.lamb;

import java.io.BufferedReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.modelcc.io.file.ReaderCharSequence;
import org.modelcc.language.lexis.LexicalSpecification;
import org.modelcc.language.lexis.TokenSpecification;
import org.modelcc.lexer.LexicalGraph;
import org.modelcc.lexer.Token;
import org.modelcc.lexer.Tokenizer;
import org.modelcc.lexer.recognizer.MatchedObject;

/**
 * Lamb - Lexer with AMBiguity Support
 * 
 * @author Luis Quesada ([email protected]) & Fernando Berzal ([email protected])
 */
public class Lamb extends Tokenizer implements Serializable 
{
	/** 
	 * Input
	 */
	private ReaderCharSequence input;
	    
    /**
     * Token list.
     */
    private List tokens;
    
    /**
     * Token specifications
     */
	private List stspecs;
	
	/**
	 * Precedence relationships
	 */
	private Map> precedences; 
    
    
	/**
	 * Constructor
	 * @param lexis lexical specification
	 * @param reader input reader
	 */
    
    public Lamb (LexicalSpecification lexis, Reader reader)
    {
    	super(lexis,reader);
    	
    	input = new ReaderCharSequence( new BufferedReader(reader) );
        stspecs = lexis.getTokenSpecifications();
        precedences = lexis.getPrecedences();
    }
    
    /**
     * Build a token corresponding to the given token specification starting at a specific position
     * @param ts Token specification
     * @param index Input string index
     * @return Token of the desired type at the given index
     */
    private Token buildToken (TokenSpecification ts, int index) 
    {
		MatchedObject match = ts.getRecognizer().read(input,index);

		if (match != null) {
			Token t = new Token(ts.getType(),match.getObject(),match.getText(),index,input.lineAt(index));
			Object obj = ts.getBuilder().build(t);
			
			if (obj!=null) {
				t.setUserData(obj);
				return t;
			}
		}
		
		return null;
    }


    /**
     * Perform lexical analysis.
     * @param ls the lexer specification.
     * @param input the input string.
     * @return the obtained lexical graph.
     */
    public LexicalGraph scan () 
    {
    	input.toString(); // Force reading until EOF, so tht input.length() is known before scanning...
    	
    	int inputstart = 0;
    	int inputend = input.length()-1;
        
        Search[] search = scanInput ();

        while (inputstart=inputstart && search[inputend]==Search.SKIP)
        	inputend--;

        return buildLexicalGraph(inputstart, inputend, search);
    }


    // Scanning
    // --------

    /**
     * Search state enumeration.
     */
    private enum Search {
        // Search has to be performed.
        OPEN,
        // Search has not been performed.
        CLOSED,
        // Skip.
        SKIP
    }

	private Search[] scanInput () 
	{
        int i,k;
        int start,end;         // Start and end positions of a token.
        Token t;               // Current token.
        Set pset; // Precedence set
        ArrayList currentTokens = new ArrayList();
        
        // List of elements forbidden by precedence in each position.
        Set[] forbidden = new Set[input.length()+1];
        
        for (i=0; i();

        // Determine if search must be performed starting at each input string index.
		Search search[] = new Search[input.length()+1];

        for (i=0; i();

        for (i=0; iend)
        						end = start;
        					if (end+1 < input.length())
        						if (search[end+1] == Search.CLOSED)
        							search[end+1] = Search.OPEN;
        					for (k=start; k<=end; k++) {
        						pset = precedences.get(ts);
        						if (pset != null)
        							forbidden[k].addAll(pset);
        					}
        				}
        			}
        		}
        		
        		if (search[i]!=Search.SKIP)
        			tokens.addAll(currentTokens);
        		
        		currentTokens.clear();
        	}
        }

        return search;
	}

    // Lexical graph generation
    // ------------------------
    
    private LexicalGraph buildLexicalGraph(int inputStart, int inputEnd, Search[] search) 
    {
        int i,j;
        Token ti,tj; // Tokens
        int state;   // State.
        int minend;  // Minimum end position.
        
        LexicalGraph graph = new LexicalGraph(tokens,inputStart, inputEnd);

        // Link tokens.
        
        for (i=tokens.size()-1; i>=0; i--) {
        	ti = tokens.get(i);
        	state = 0;
        	minend = input.length()+1;
        	for (j=i+1; jti.getEndIndex())
        				state = 1;
        			// no break!
        		case 1:
        			if (tj.getStartIndex()>ti.getEndIndex()) {
        				if (tj.getStartIndex()>minend) {
        					state = 2;
        				} else {
        					minend = Math.min(minend,tj.getEndIndex());
        					boolean consecutive = true;
        					for (int f = ti.getEndIndex()+1;f < tj.getStartIndex();f++) {
        						if (search[f]!=Search.SKIP)
        							consecutive = false;
        					}
        					if (consecutive)
        						graph.link(ti,tj);
        				}
        			}
        		}
        	}
        }

        // Start token set
        
        for (Token t: tokens) {
        	if (graph.getPreceding(t) == null)
        		graph.addStartToken(t);
        }

        return graph;
	}
    
    // Tokenizer interface
    // -------------------

    private int tokenIndex = 0;
    
	@Override
	public Token nextToken() 
	{
		if (tokens==null) 
			scan();
		
		if (tokenIndex




© 2015 - 2024 Weber Informatics LLC | Privacy Policy