org.modelcc.lexer.lamb.Lamb Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ModelCC Show documentation
Show all versions of ModelCC Show documentation
ModelCC is a model-based parser generator (a.k.a. compiler compiler) that decouples language specification from language processing, avoiding some of the problems caused by grammar-driven parser generators. ModelCC receives a conceptual model as input, along with constraints that annotate it. It is then able to create a parser for the desired textual language and the generated parser fully automates the instantiation of the language conceptual model. ModelCC also includes a built-in reference resolution mechanism that results in abstract syntax graphs, rather than mere abstract syntax trees.
The newest version!
/*
* ModelCC, distributed under ModelCC Shared Software License, www.modelcc.org
*/
package org.modelcc.lexer.lamb;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.modelcc.io.file.ReaderCharSequence;
import org.modelcc.language.lexis.LexicalSpecification;
import org.modelcc.language.lexis.TokenSpecification;
import org.modelcc.lexer.LexicalGraph;
import org.modelcc.lexer.Token;
import org.modelcc.lexer.Tokenizer;
import org.modelcc.lexer.recognizer.MatchedObject;
/**
* Lamb - Lexer with AMBiguity Support
*
* @author Luis Quesada ([email protected]) & Fernando Berzal ([email protected])
*/
public class Lamb extends Tokenizer implements Serializable
{
/**
* Input
*/
private ReaderCharSequence input;
/**
* Token list.
*/
private List tokens;
/**
* Token specifications
*/
private List stspecs;
/**
* Precedence relationships
*/
private Map> precedences;
/**
* Constructor
* @param lexis lexical specification
* @param reader input reader
*/
public Lamb (LexicalSpecification lexis, Reader reader)
{
super(lexis,reader);
input = new ReaderCharSequence( new BufferedReader(reader) );
stspecs = lexis.getTokenSpecifications();
precedences = lexis.getPrecedences();
}
/**
* Build a token corresponding to the given token specification starting at a specific position
* @param ts Token specification
* @param index Input string index
* @return Token of the desired type at the given index
*/
private Token buildToken (TokenSpecification ts, int index)
{
MatchedObject match = ts.getRecognizer().read(input,index);
if (match != null) {
Token t = new Token(ts.getType(),match.getObject(),match.getText(),index,input.lineAt(index));
Object obj = ts.getBuilder().build(t);
if (obj!=null) {
t.setUserData(obj);
return t;
}
}
return null;
}
/**
* Perform lexical analysis.
* @param ls the lexer specification.
* @param input the input string.
* @return the obtained lexical graph.
*/
public LexicalGraph scan ()
{
input.toString(); // Force reading until EOF, so tht input.length() is known before scanning...
int inputstart = 0;
int inputend = input.length()-1;
Search[] search = scanInput ();
while (inputstart=inputstart && search[inputend]==Search.SKIP)
inputend--;
return buildLexicalGraph(inputstart, inputend, search);
}
// Scanning
// --------
/**
* Search state enumeration.
*/
private enum Search {
// Search has to be performed.
OPEN,
// Search has not been performed.
CLOSED,
// Skip.
SKIP
}
private Search[] scanInput ()
{
int i,k;
int start,end; // Start and end positions of a token.
Token t; // Current token.
Set pset; // Precedence set
ArrayList currentTokens = new ArrayList();
// List of elements forbidden by precedence in each position.
Set[] forbidden = new Set[input.length()+1];
for (i=0; i();
// Determine if search must be performed starting at each input string index.
Search search[] = new Search[input.length()+1];
for (i=0; i();
for (i=0; iend)
end = start;
if (end+1 < input.length())
if (search[end+1] == Search.CLOSED)
search[end+1] = Search.OPEN;
for (k=start; k<=end; k++) {
pset = precedences.get(ts);
if (pset != null)
forbidden[k].addAll(pset);
}
}
}
}
if (search[i]!=Search.SKIP)
tokens.addAll(currentTokens);
currentTokens.clear();
}
}
return search;
}
// Lexical graph generation
// ------------------------
private LexicalGraph buildLexicalGraph(int inputStart, int inputEnd, Search[] search)
{
int i,j;
Token ti,tj; // Tokens
int state; // State.
int minend; // Minimum end position.
LexicalGraph graph = new LexicalGraph(tokens,inputStart, inputEnd);
// Link tokens.
for (i=tokens.size()-1; i>=0; i--) {
ti = tokens.get(i);
state = 0;
minend = input.length()+1;
for (j=i+1; jti.getEndIndex())
state = 1;
// no break!
case 1:
if (tj.getStartIndex()>ti.getEndIndex()) {
if (tj.getStartIndex()>minend) {
state = 2;
} else {
minend = Math.min(minend,tj.getEndIndex());
boolean consecutive = true;
for (int f = ti.getEndIndex()+1;f < tj.getStartIndex();f++) {
if (search[f]!=Search.SKIP)
consecutive = false;
}
if (consecutive)
graph.link(ti,tj);
}
}
}
}
}
// Start token set
for (Token t: tokens) {
if (graph.getPreceding(t) == null)
graph.addStartToken(t);
}
return graph;
}
// Tokenizer interface
// -------------------
private int tokenIndex = 0;
@Override
public Token nextToken()
{
if (tokens==null)
scan();
if (tokenIndex