![JAR search and dependency download from the Maven repository](/logo.png)
com.creativewidgetworks.goldparser.engine.Parser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of goldengine Show documentation
Show all versions of goldengine Show documentation
Java implementation of Devin Cook's GOLD Parser engine
package com.creativewidgetworks.goldparser.engine;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.io.StringReader;
import java.util.Map;
import java.util.Stack;
import java.util.TreeMap;
import com.creativewidgetworks.goldparser.engine.enums.AdvanceMode;
import com.creativewidgetworks.goldparser.engine.enums.CGTRecord;
import com.creativewidgetworks.goldparser.engine.enums.EndingMode;
import com.creativewidgetworks.goldparser.engine.enums.LRActionType;
import com.creativewidgetworks.goldparser.engine.enums.ParseMessage;
import com.creativewidgetworks.goldparser.engine.enums.ParseResult;
import com.creativewidgetworks.goldparser.engine.enums.SymbolType;
import com.creativewidgetworks.goldparser.util.FormatHelper;
/**
* Parser
*
* This is the main class in the GOLD Parser Engine and is used to perform
* all duties required to the parsing of a source text string. This class
* contains the LALR(1) State Machine code, the DFA State Machine code,
* character table (used by the DFA algorithm) and all other structures and
* methods needed to interact with the developer.
*
* Dependencies:
* @see Group
* @see GroupList
* @see FStateList
* @see LRState
* @see Position
* @see Production
* @see ProductionList
* @see Reduction
* @see Symbol
* @see SymbolList
* @see Token
*
* Note that several class fields are marked as protected instead of private. This was done
* to avoid having to add getters() used only for testing. If and when the tests are
* refactored to use reflection to probe these fields, they can once again be marked
* protected.
*
* @author Devin Cook (http://www.DevinCook.com/GOLDParser)
* @author Ralph Iden (http://www.creativewidgetworks.com), port to Java
* @version 5.0.0
*/
public class Parser {
// Standard attribute names
public static final String ABOUT = "About";
public static final String AUTHOR = "Author";
public static final String CASE_SENSITIVE = "Case Sensitive";
public static final String CHARACTER_MAPPING = "Character Mapping";
public static final String CHARACTER_SET = "Character Set";
public static final String GENERATED_BY = "Generated By";
public static final String GENERATED_DATE = "Generated Date";
public static final String NAME = "Name";
public static final String START_SYMBOL = "Start Symbol";
public static final String VERSION = "Version";
public static final String PARSER_NAME = "GOLD Parser Engine - Version ";
public static final String PARSER_VERSION = "5.0.3";
// Flag to indicate which grammar table file is being processed
protected boolean version1Format;
// Symbols recognized by the system
protected SymbolList symbolTable;
// DFA
protected FAStateList dfa;
protected CharacterSetList characterSetTable;
protected StringBuilder lookaheadBuffer;
// Productions
protected ProductionList productionTable;
// LALR
protected LRStateList lrStates;
private int currentLALR;
protected Stack stack;
// Fields for Reductions and errors
private SymbolList expectedSymbols;
protected boolean haveReduction;
private boolean trimReductions;
// Locally used fields
private boolean tablesLoaded;
private Stack inputTokens; // Tokens to be analyzed
// Input reader for the source code to parse
protected Reader source;
// Line and column information
private Position sysPosition; // Internal only, so user cannot alter values
private Position currentPosition; // Location of last read terminal
// Grammar attributes
protected Map attributes;
// Lexical groups
private Stack groupStack;
protected GroupList groupTable;
public Parser() {
stack = new Stack();
inputTokens = new Stack();
groupStack = new Stack();
}
/*----------------------------------------------------------------------------*/
/**
* Return library name and version information
* @return version information
*/
public String about() {
return PARSER_NAME + PARSER_VERSION;
}
/*----------------------------------------------------------------------------*/
/**
* Consume/remove characters from the front of the lookahead buffer
* and adjust the value of the system Position object.
* @param count the number of characters to consume
*/
private void consumeBuffer(int count) {
if (count > 0 && count <= lookaheadBuffer.length()) {
// Adjust position
for (int i = 0; i < count; i++) {
char c = lookaheadBuffer.charAt(i);
if (c == 0x0A) {
if (sysPosition.getColumn() > 1) {
// Increment row if Unix EOLN (LF)
sysPosition.incrementLine();
}
} else if (c == 0x0D) {
sysPosition.incrementLine();
} else {
sysPosition.incrementColumn();
}
}
// Remove the characters
lookaheadBuffer.delete(0, count);
}
}
/*----------------------------------------------------------------------------*/
public String getAttribute(String name) {
return getAttribute(name, null);
}
public String getAttribute(String name, String defaultValue) {
if (attributes == null) {
attributes = new TreeMap();
}
String value = attributes.get(name);
return value == null ? defaultValue : value;
}
public void setAttribute(String name, String value) {
if (attributes == null) {
attributes = new TreeMap();
}
attributes.put(name, value);
}
/*----------------------------------------------------------------------------*/
public Position getCurrentPosition() {
return currentPosition;
}
/*----------------------------------------------------------------------------*/
/**
* Return the last token read by the parser
* @return the last token read by the parser.
*/
protected Token getCurrentToken() {
return inputTokens.peek();
}
/*----------------------------------------------------------------------------*/
/**
* When parse() returns a REDUCE, the method will return the current reduction.
* @return the reduction
*/
public Reduction getCurrentReduction() {
return haveReduction ? stack.peek().asReduction() : null;
}
protected void setCurrentReduction(Reduction reduction) {
if (haveReduction) {
stack.peek().setData(reduction);
}
}
/*----------------------------------------------------------------------------*/
/**
* Return the list of expected symbols
* @Token the last token read by the parser.
*/
public SymbolList getExpectedSymbols() {
return expectedSymbols;
}
/*----------------------------------------------------------------------------*/
/**
* Searches the symbol table for the first occurrence of the the
* specified symbol type.
* @param SymbolTypeTest to find
* @return Symbol, the first symbol of type or null if no symbol found
*/
private Symbol getFirstSymbolOfType(SymbolType type) {
for (Symbol symbol : symbolTable) {
if (symbol.getType().equals(type)) {
return symbol;
}
}
return null;
}
/*----------------------------------------------------------------------------*/
/**
* Searches the symbol table for a specific symbol specified by the symbol name.
* @param name of symbol to find
* @return Symbol, the symbol or null if no symbol found
*/
protected Symbol getSymbolByName(String name) {
if (symbolTable != null) {
for (Symbol symbol : symbolTable) {
if (symbol.getName().equals(name)) {
return symbol;
}
}
}
return null;
}
/*----------------------------------------------------------------------------*/
/**
* Return the parser's current position (row, column)
* @return Position
*/
public Position getPosition() {
return currentPosition;
}
/*----------------------------------------------------------------------------*/
protected Reader getSource() {
return source;
}
/*----------------------------------------------------------------------------*/
protected boolean isVersion1Format() {
return version1Format;
}
protected void setVersion1Format(boolean version1) {
version1Format = version1;
}
/*----------------------------------------------------------------------------*/
/**
* Loads the parse tables from the specified file.
* NOTE: Only CGT version 5.0 is supported.
* @param file to open and load.
* @return true if the file was successfully processed.
* @throws IOException
*/
protected boolean loadTables(File file) throws IOException {
if (file == null) {
throw new IOException(FormatHelper.formatMessage("messages", "error.cgt_missing"));
}
return loadTables(new FileInputStream(file));
}
/**
* Loads the parse tables from the specified input stream. The inputstream will
* be closed when the method returns.
* NOTE: Only CGT version 5.0 is supported.
* @param input stream to read.
* @return true if the stream was successfully processed.
* @throws IOException
*/
protected boolean loadTables(InputStream input) throws IOException {
boolean result = true;
int index;
CGT cgt = new CGT();
CharacterSet characterSet;
try {
cgt.open(input);
restart();
tablesLoaded = false;
while (result) {
cgt.getNextRecord();
if (cgt.atEOF()) {
break;
}
int recordType = cgt.retrieveByte();
// System.out.println(CGTRecord.getCGTRecord(recordType));
switch (CGTRecord.getCGTRecord(recordType)) {
case PARAMETER:
version1Format = true;
setAttribute(NAME, cgt.retrieveString());
setAttribute(VERSION, cgt.retrieveString());
setAttribute(AUTHOR, cgt.retrieveString());
setAttribute(ABOUT, cgt.retrieveString());
setAttribute(CASE_SENSITIVE, Boolean.toString(cgt.retrieveBoolean()));
setAttribute(START_SYMBOL, Integer.toString(cgt.retrieveInteger()));
break;
case PROPERTY:
// Index (not used), name, value
version1Format = false;
cgt.retrieveInteger(); // Index (not used)
setAttribute(cgt.retrieveString(), cgt.retrieveString());
break;
// Counts for Symbols, Rules, DFA, and LALR lists
case COUNTS:
case COUNTS5:
symbolTable = new SymbolList(cgt.retrieveInteger());
characterSetTable = new CharacterSetList(cgt.retrieveInteger());
productionTable = new ProductionList(cgt.retrieveInteger());
dfa = new FAStateList(cgt.retrieveInteger());
lrStates = new LRStateList(cgt.retrieveInteger());
if (!isVersion1Format()) {
groupTable = new GroupList(cgt.retrieveInteger());
} else {
// Create an empty table in case GROUP_START/END pairs follow
groupTable = new GroupList();
}
break;
// Character set
case CHARSET:
index = cgt.retrieveInteger();
characterSet = new CharacterSet();
characterSetTable.set(index, characterSet);
characterSet.add(new CharacterRange(cgt.retrieveString()));
break;
// Character range
case CHARRANGES:
index = cgt.retrieveInteger();
cgt.retrieveInteger(); // codepage
cgt.retrieveInteger(); // total sets
cgt.retrieveEntry(); // reserved
characterSet = new CharacterSet();
characterSetTable.set(index, characterSet);
while (!cgt.isRecordComplete()) {
characterSet.add(new CharacterRange(cgt.retrieveInteger(), cgt.retrieveInteger()));
}
break;
// Symbols
case SYMBOL:
index = cgt.retrieveInteger();
String name = cgt.retrieveString();
SymbolType type = SymbolType.getSymbolType(cgt.retrieveInteger());
Symbol symbol = new Symbol(name, type, index);
symbolTable.set(index, symbol);
break;
// Rules (productions)
case RULE:
index = cgt.retrieveInteger();
int headIndex = cgt.retrieveInteger();
cgt.retrieveEntry(); // Reserved
Production production = new Production(symbolTable.get(headIndex), index);
productionTable.set(index, production);
while (!cgt.isRecordComplete()) {
int symIndex = cgt.retrieveInteger();
production.getHandle().add(symbolTable.get(symIndex));
}
break;
// Initial states for DFA and LALR
case INITIALSTATES:
dfa.setInitialState(cgt.retrieveInteger());
lrStates.setInitialState(cgt.retrieveInteger());
break;
// Groups
case GROUP:
index = cgt.retrieveInteger();
Group group = new Group();
group.setName(cgt.retrieveString());
group.setContainer(symbolTable.get(cgt.retrieveInteger()));
group.setStart(symbolTable.get(cgt.retrieveInteger()));
group.setEnd(symbolTable.get(cgt.retrieveInteger()));
group.setAdvanceMode(AdvanceMode.getAdvanceMode(cgt.retrieveInteger()));
group.setEndingMode(EndingMode.getEndingMode(cgt.retrieveInteger()));
cgt.retrieveEntry(); // Reserved
// Nesting levels
int count = cgt.retrieveInteger();
for (int i = 0; i < count; i++) {
group.getNesting().add(cgt.retrieveInteger());
}
// Link back
group.getContainer().setGroup(group);
group.getStart().setGroup(group);
group.getEnd().setGroup(group);
groupTable.set(index, group);
break;
case GROUPNESTING:
break;
case DFASTATE:
index = cgt.retrieveInteger();
boolean accept = cgt.retrieveBoolean();
int acceptIndex = cgt.retrieveInteger();
cgt.retrieveEntry(); // Reserved
if (accept) {
dfa.set(index, new FAState(symbolTable.get(acceptIndex)));
} else {
dfa.set(index, new FAState());
}
while (!cgt.isRecordComplete()) {
int setIndex = cgt.retrieveInteger();
int target = cgt.retrieveInteger();
cgt.retrieveEntry(); // Reserved
dfa.get(index).getEdges().add(new FAEdge(characterSetTable.get(setIndex), target));
}
break;
case LRSTATE:
index = cgt.retrieveInteger();
cgt.retrieveEntry(); // Reserved
LRState lrState = new LRState();
lrStates.set(index, lrState);
while (!cgt.isRecordComplete()) {
symbol = symbolTable.get(cgt.retrieveInteger());
LRActionType actionType = LRActionType.getLRActionType(cgt.retrieveInteger());
int value = cgt.retrieveInteger();
cgt.retrieveEntry(); // Reserved
lrState.add(new LRAction(symbol,actionType, value));
}
break;
case UNDEFINED:
throw new IOException("Unknown record type of " + recordType + " was read.");
}
}
} finally {
cgt.close();
}
tablesLoaded = result;
resolveCommentGroupsForVersion1Grammars();
return result;
}
/*----------------------------------------------------------------------------*/
/**
* Return a single character at charIndex. This method will read and fill the
* buffer as needed from the source stream.
* @param charIndex offset of the lookahead buffer.
* @return char that was read or "" EOF has been reached.
*/
private String lookahead(int charIndex) {
if (charIndex >= 0) {
if (charIndex > lookaheadBuffer.length()) {
// Requesting data past the end of stream, so perform a read
int readCount = charIndex - lookaheadBuffer.length();
for (int i = 0; i < readCount; i++) {
int c;
try {
c = source.read();
} catch (IOException ioe) {
c = -1;
}
if (c != -1) {
lookaheadBuffer.append((char)c);
} else {
break; // EOF reached
}
}
}
// If the buffer is still smaller than charIndex, we have reached
// the end of the text. In this case, return a null string - the DFA
// code will understand.
return (charIndex <= lookaheadBuffer.length()) ? String.valueOf(lookaheadBuffer.charAt(charIndex - 1)) : "";
}
return "";
}
/*----------------------------------------------------------------------------*/
/**
* Return count characters from the lookahead buffer.
* These characters are used to create the text stored in a token. Because of
* the design of the DFA algorithm, count should never exceed the buffer length.
* @param count number of characters to return
* @return String
*/
private String getLookaheadBuffer(int count) {
if (count > lookaheadBuffer.length()) {
count = lookaheadBuffer.length();
}
return count > 0 ? lookaheadBuffer.substring(0, count) : "";
}
/*----------------------------------------------------------------------------*/
/**
* This method implements the DFA for the parser's lexer. A
* token is generated which is used by the LALR state machine.
* @return Token
*/
private Token lookaheadDFA() {
Token token = new Token();
int currentDFA = dfa.getInitialState();
int curPosition = 1; // Next byte in the input stream
int lastAcceptState = -1; // Nothing has been accepted yet
int lastAcceptPosition = -1;
int target = 0;
String str = lookahead(1);
if (str.length() > 0) {
boolean found;
boolean done = false;
while (!done) {
// Search all the branches of the current DFA state for the next
// character in the input stream. If found, the target state is returned.
str = lookahead(curPosition);
if (str.length() == 0) {
found = false;
} else {
found = false;
for (int i = 0; !found && i < dfa.get(currentDFA).getEdges().size(); i++) {
FAEdge edge = dfa.get(currentDFA).getEdges().get(i);
if (edge.getChars().contains(str.charAt(0))) {
found = true;
target = edge.getTarget();
}
}
}
// This block checks whether an edge was found from the current state. If so, the
// state and current position advance. Otherwise it is time to exit the main loop
// and report the token found (if there was one). If the LastAcceptState is -1, then
// we never found a match and the Error Token is created. Otherwise, a new token is
// created using the Symbol in the Accept State and all the characters that comprise it.
if (found) {
// This code checks whether the target state accepts a token. If so, it sets
// the appropriate variables so when the algorithm is done, it can return the
// proper token and number of characters.
if (dfa.get(target).getAccept() != null) {
lastAcceptState = target;
lastAcceptPosition = curPosition;
}
currentDFA = target;
curPosition++;
} else {
// No edge found
done = true;
if (lastAcceptState == -1) {
// Lexer doesn't recognize the symbol
token.setSymbol(getFirstSymbolOfType(SymbolType.ERROR));
token.setData(getLookaheadBuffer(1));
} else {
// Create Token and read characters
// Data contains the total number of accept characters
token.setSymbol(dfa.get(lastAcceptState).getAccept());
token.setData(getLookaheadBuffer(lastAcceptPosition));
}
}
}
} else {
token.setData("");
token.setSymbol(getFirstSymbolOfType(SymbolType.END));
}
token.setPosition(new Position(sysPosition));
return token;
}
/*----------------------------------------------------------------------------*/
/**
* Open a file to be parsed
* @param sourceFile to be parsed
* @return true if the file is ready to be parsed
* @throws IOException if file is not available.
*/
protected boolean open(File sourceFile) throws IOException {
return open(new FileReader(sourceFile));
}
/**
* Prepare the parser to process the source contained in the String.
* @param sourceStatements, the code to parse
* @return true if the source is ready to be parsed.
*/
protected boolean open(String sourceStatements) {
return open(new StringReader(sourceStatements));
}
/**
* Open the reader to be parsed
* @param reader that will be used to read the source code to parse.
* @return true if the reader is ready to be parsed.
*/
protected boolean open(Reader reader) {
restart();
source = reader;
stack.push(new Token());
return true;
}
/*----------------------------------------------------------------------------*/
/**
* Returns the next token in the stream -- This method can be overridden to support
* virtual terminals (indentation sensitive grammars, etc.)
*/
protected Token nextToken() {
return produceToken();
}
/**
* Performs a parse action on the input stream. This method is typically used in a loop until
* either the grammar is accepted or an error occurs.
* @return ParseMessage
*/
protected ParseMessage parse() {
if (!tablesLoaded) {
return ParseMessage.NOT_LOADED_ERROR;
}
Token read;
ParseMessage parseMessage = ParseMessage.UNDEFINED;
// Loop until a breakable event
boolean done = false;
while (!done) {
if (inputTokens.size() == 0) {
read = nextToken();
inputTokens.push(read);
// Handle the case where an unterminated comment block consumes the entire program
if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
// Runaway group
parseMessage = ParseMessage.GROUP_ERROR;
} else {
// A good token was read
parseMessage = ParseMessage.TOKEN_READ;
}
done = true;
} else {
read = inputTokens.peek();
currentPosition.set(read.getPosition()); // Update current position
if (SymbolType.NOISE.equals(read.getType())) {
// Discard token - these tokens were already reported to the user
inputTokens.pop();
} else if (SymbolType.ERROR.equals(read.getType())) {
parseMessage = ParseMessage.LEXICAL_ERROR;
done = true;
} else if (SymbolType.END.equals(read.getType()) && groupStack.size() > 0) {
// Runaway group
parseMessage = ParseMessage.GROUP_ERROR;
done = true;
} else {
ParseResult parseResult = parseLALR(read); // Same method as v1
switch (parseResult) {
case ACCEPT:
parseMessage = ParseMessage.ACCEPT;
done = true;
break;
case INTERNAL_ERROR:
parseMessage = ParseMessage.INTERNAL_ERROR;
done = true;
break;
case REDUCE_NORMAL:
parseMessage = ParseMessage.REDUCTION;
done = true;
break;
case SHIFT:
// ParseToken() shifted the token on the front of the Token-Queue. It
// now exists on the Token-Stack and must be eliminated from the queue.
inputTokens.remove(0);
break;
case SYNTAX_ERROR:
parseMessage = ParseMessage.SYNTAX_ERROR;
done = true;
break;
case REDUCE_ELIMINATED: // fall through intended
case UNDEFINED:
// do nothing
break;
}
}
}
}
return parseMessage;
}
/*----------------------------------------------------------------------------*/
/**
* This method analyzes a token and either:
* 1. Makes a SINGLE reduction and pushes a complete Reduction object on the stack
* 2. Accepts the token and shifts
* 3. Errors and places the expected symbol indexes in the Tokens list.
*
* @param nextToken to be analyzed
* @return ParseResult
*/
private ParseResult parseLALR(Token nextToken) {
ParseResult parseResult = null;
haveReduction = false;
LRAction parseAction = lrStates.get(currentLALR).find(nextToken);
//System.out.println("Action: " + parseAction.toString());
switch (parseAction.getType()) {
case ACCEPT:
haveReduction = true;
parseResult = ParseResult.ACCEPT;
break;
case REDUCE:
// Produce a reduction - remove as many tokens as members in the rule and push a nonterminal token
Production production = productionTable.get(parseAction.getValue());
Token head;
if (trimReductions && production.containsOneNonTerminal()) {
// The current rule only consists of a single nonterminal and can be trimmed from the
// parse tree. Usually we create a new Reduction, assign it to the Data property
// of Head and push it on the m_Stack. However, in this case, the Data property of the
// Head will be assigned the Data property of the reduced token (i.e. the only one
// on the m_Stack). To save code, the value popped of the m_Stack is changed into the head.
head = stack.pop();
head.setSymbol(production.getHead());
parseResult = ParseResult.REDUCE_ELIMINATED;
} else {
haveReduction = true;
Reduction newReduction = new Reduction(production.getHandle().size());
newReduction.setParent(production);
for (int i = production.getHandle().size() - 1; i >= 0; i--) {
newReduction.set(i, stack.pop());
}
head = new Token(production.getHead(), newReduction);
parseResult = ParseResult.REDUCE_NORMAL;
}
// goto value
int index = stack.peek().getState();
LRAction lrAction = lrStates.get(index).find(production.getHead());
if (!lrAction.equals(LRState.LRACTION_UNDEFINED)) {
currentLALR = lrAction.getValue();
head.setState(currentLALR);
stack.push(head);
} else {
parseResult = ParseResult.INTERNAL_ERROR;
}
break;
case SHIFT:
currentLALR = parseAction.getValue();
nextToken.setState(currentLALR);
stack.push(nextToken);
parseResult = ParseResult.SHIFT;
break;
case ERROR: // fall-through intended
case GOTO: // fall-through intended
case UNDEFINED:
// Syntax error - produce a list of expected symbols to report
expectedSymbols.clear();
for (LRAction action : lrStates.get(currentLALR)) {
SymbolType type = action.getSymbol().getType();
switch (type) {
case CONTENT: // fall-through intended
case END: // fall-through intended
expectedSymbols.add(action.getSymbol());
break;
case GROUP_START: // fall-through intended
case GROUP_END: // fall-through intended
case COMMENT_LINE:
expectedSymbols.add(action.getSymbol());
break;
case ERROR:
case NOISE:
case NON_TERMINAL:
case UNDEFINED:
// do nothing
break;
}
}
parseResult = ParseResult.SYNTAX_ERROR;
break;
}
return parseResult;
}
/*----------------------------------------------------------------------------*/
/**
* This method creates a token and also takes into account the current
* lexing mode of the parser. In particular, it contains the group logic.
*
* A stack is used to track the current "group". This replaces the comment
* level counter. Text is appended to the token on the top of the stack. This
* allows the group text to returned in one chunk.
* @return Token
*/
protected Token produceToken() {
Token token = null;
boolean nestGroup = false;
boolean done = false;
while (!done) {
Token read = lookaheadDFA();
// Groups (comments, etc.)
// The logic - to determine if a group should be nested - requires that the top
// of the stack and the symbol's linked group need to be looked at. Both of these
// can be unset. So, this section sets a boolean and avoids errors. We will use
// this boolean in the logic chain below.
if (read.getType().equals(SymbolType.GROUP_START) || read.getType().equals(SymbolType.COMMENT_LINE)) {
if (groupStack.size() == 0) {
nestGroup = true;
} else {
nestGroup = groupStack.peek().getGroup().getNesting().contains(read.getGroup().getIndex());
}
} else {
nestGroup = false;
}
// Logic chain
if (nestGroup) {
consumeBuffer(read.asString().length());
// fix up the comment block
if (read.getData() != null) {
read.appendData(read.getData().toString());
read.setData(null);
}
groupStack.push(read);
} else if (groupStack.size() == 0) {
// The token is ready to be analyzed
consumeBuffer(read.asString().length());
token = read;
done = true;
} else if (groupStack.peek().getGroup().getEnd().getTableIndex() == read.getTableIndex()) {
// End the current group
Token pop = groupStack.pop();
// Ending logic
if (pop.getGroup().getEndingMode() == EndingMode.CLOSED) {
pop.appendData(read.asString());
consumeBuffer(read.asString().length());
}
if (groupStack.size() == 0) {
// We are out of the group. Return pop'd token which contains all the group text
pop.setSymbol(pop.getGroup().getContainer());
token = pop;
done = true;
} else {
// Append group text to parent
groupStack.peek().appendData(pop.asString());
}
} else if (read.getType().equals(SymbolType.END)) {
// EOF always stops the loop. The caller method (parse) can flag a runaway group error.
token = read;
done = true;
} else {
// We are in a group, Append to the Token on the top of the stack.
// Take into account the Token group mode
Token top = groupStack.peek();
if (top.getGroup().getAdvanceMode() == AdvanceMode.TOKEN) {
// Append all text
top.appendData(read.asString());
consumeBuffer(read.asString().length());
} else {
// Append one character
top.appendData(read.asString().substring(0, 1));
consumeBuffer(1);
}
}
}
return token;
}
/*----------------------------------------------------------------------------*/
/**
* Inserts Group objects into the group table so comments can be processed in a
* grammar. It is assumed that version 1.0 files have a maximum of 1 closed
* comment block and one comment line symbol.
*/
private void resolveCommentGroupsForVersion1Grammars() {
if (isVersion1Format()) {
Group group;
Symbol symbolStart = null;
Symbol symbolEnd = null;
// Create a new COMMENT_LINE group
for (Symbol currentStartSymbol : symbolTable) {
if (currentStartSymbol.getType().equals(SymbolType.COMMENT_LINE)) {
symbolStart = currentStartSymbol;
group = new Group();
group.setName("Comment Line");
group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
group.setStart(symbolStart);
group.setEnd(symbolTable.findByName("NewLine"));
group.setAdvanceMode(AdvanceMode.TOKEN);
group.setEndingMode(EndingMode.OPEN);
groupTable.add(group);
symbolStart.setGroup(group);
break;
}
}
// Create a new COMMENT_BLOCK group
for (Symbol currentStartSymbol : symbolTable) {
if (currentStartSymbol.getType().equals(SymbolType.GROUP_START)) {
symbolStart = symbolEnd = currentStartSymbol;
for (Symbol currentEndSymbol : symbolTable) {
if (currentEndSymbol.getType().equals(SymbolType.GROUP_END)) {
symbolEnd = currentEndSymbol;
break;
}
}
group = new Group();
group.setName("Comment Block");
group.setContainer(symbolTable.findByName(SymbolList.SYMBOL_COMMENT));
group.setStart(symbolStart);
group.setEnd(symbolEnd);
group.setAdvanceMode(AdvanceMode.TOKEN);
group.setEndingMode(EndingMode.CLOSED);
groupTable.add(group);
symbolStart.setGroup(group);
symbolEnd.setGroup(group);
break;
}
}
}
}
/*----------------------------------------------------------------------------*/
/**
* Restarts the parser. The loaded tables are retained
*/
protected void restart() {
currentLALR = LRState.INITIAL_STATE;
sysPosition = new Position(1, 1);
currentPosition = new Position(1, 1);
lookaheadBuffer = new StringBuilder();
haveReduction = false;
if (expectedSymbols == null) {
expectedSymbols = new SymbolList();
}
expectedSymbols.clear();
if (groupStack == null) {
groupStack = new Stack();
}
groupStack.clear();
if (inputTokens == null) {
inputTokens = new Stack();
}
inputTokens.clear();
if (stack == null) {
stack = new Stack();
}
stack.clear();
}
/*----------------------------------------------------------------------------*/
public void setTrimReductions(boolean value) {
trimReductions = value;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy