org.modelcc.lexer.flex.Flex Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ModelCC Show documentation
Show all versions of ModelCC Show documentation
ModelCC is a model-based parser generator (a.k.a. compiler compiler) that decouples language specification from language processing, avoiding some of the problems caused by grammar-driven parser generators. ModelCC receives a conceptual model as input, along with constraints that annotate it. It is then able to create a parser for the desired textual language and the generated parser fully automates the instantiation of the language conceptual model. ModelCC also includes a built-in reference resolution mechanism that results in abstract syntax graphs, rather than mere abstract syntax trees.
The newest version!
/*
* ModelCC, distributed under ModelCC Shared Software License, www.modelcc.org
*/
package org.modelcc.lexer.flex;
import java.io.BufferedReader;
import java.io.Reader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.modelcc.io.file.ReaderCharSequence;
import org.modelcc.language.lexis.LexicalSpecification;
import org.modelcc.language.lexis.TokenSpecification;
import org.modelcc.lexer.LexicalGraph;
import org.modelcc.lexer.Token;
import org.modelcc.lexer.Tokenizer;
import org.modelcc.lexer.recognizer.MatchedObject;
/**
* FLex - Fast Lexer
*
* @author Fernando Berzal ([email protected])
*/
public class Flex extends Tokenizer implements Serializable
{
/**
* Input
*/
private ReaderCharSequence input;
/**
* Token specifications
*/
private List stspecs;
/**
* Current token
*/
private Token currentToken;
/**
* Current token specification
*/
private TokenSpecification currentTS;
/**
* Current input position
*/
int currentPosition = 0;
/**
* Constructor
* @param lexis lexer specification
* @param reader input reader
*/
public Flex (LexicalSpecification lexis, Reader reader)
{
super(lexis,reader);
stspecs = lexis.getTokenSpecifications();
input = new ReaderCharSequence( new BufferedReader(reader) );
currentPosition = 0;
}
/**
* Perform lexical analysis (a.k.a. scanning).
* @return the lexical graph obtained from this tokenizer.
*/
public LexicalGraph scan ()
{
List tokens = new ArrayList();
Token token = nextToken();
while (token!=null) {
tokens.add(token);
token = nextToken();
}
return buildLexicalGraph(tokens);
}
// Tokenizer interface
// -------------------
public Token nextToken ()
{
Token token;
do {
token = nextInputToken();
} while ((token!=null) && getCurrentTokenSpecification().isSkipped());
return token;
}
public Token nextInputToken ()
{
currentToken = null;
currentTS = null;
for (TokenSpecification ts: stspecs) { // Try to match all tokens
MatchedObject match = ts.getRecognizer().read(input,currentPosition);
if (match != null) {
if ( ( currentToken==null)
|| ( currentToken.length() < match.getText().length())
|| ( lexis.precedes(ts,currentTS) ) ) {
Token t = new Token(ts.getType(),match.getObject(),match.getText(),currentPosition,input.lineAt(currentPosition));
if (build(ts,t)) {
currentTS = ts;
currentToken = t;
}
}
}
}
if (currentToken!=null) {
currentPosition = currentToken.getEndIndex() + 1;
input.prune(currentToken.getEndIndex());
}
return currentToken;
}
// Getters
// -------
public TokenSpecification getCurrentTokenSpecification ()
{
return currentTS;
}
public Token getCurrentToken ()
{
return currentToken;
}
public int getCurrentPosition ()
{
return currentPosition;
}
// Ancillary routines
// ------------------
private boolean build(TokenSpecification m,Token t)
{
Object obj = m.getBuilder().build(t);
t.setUserData(obj);
return (obj!=null);
}
// Lexical graph generation
// ------------------------
private LexicalGraph buildLexicalGraph (List tokens)
{
int inputStart = 0;
int inputEnd = input.length()-1;
LexicalGraph graph = new LexicalGraph(tokens,inputStart,inputEnd);
for (int i=1; i0)
graph.addStartToken(tokens.get(0));
return graph;
}
}