All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nl.rrd.wool.parser.WoolBodyTokenizer Maven / Gradle / Ivy

Go to download

WOOL is a simple, powerful dialogue framework for creating virtual agent conversations.

The newest version!
/*
 * Copyright 2019 Roessingh Research and Development.
 * 
 * Permission is hereby granted, free of charge, to any person obtaining a 
 * copy of this software and associated documentation files (the "Software"), 
 * to deal in the Software without restriction, including without limitation 
 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 
 * and/or sell copies of the Software, and to permit persons to whom the 
 * Software is furnished to do so, subject to the following conditions:
 * 
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 * DEALINGS IN THE SOFTWARE.
 */

package nl.rrd.wool.parser;

import java.util.ArrayList;
import java.util.List;

import nl.rrd.wool.exception.LineNumberParseException;
import nl.rrd.wool.model.WoolVariableString;
import nl.rrd.wool.utils.ReferenceParameter;

public class WoolBodyTokenizer {
	private BodyState bodyState = new BodyState();

	/**
	 * Reads the body tokens from the specified line. The line should end with a
	 * newline (\n) character.
	 * 
	 * @param line the line with \n
	 * @param lineNum the line number (first line is 1)
	 * @return the body tokens
	 * @throws LineNumberParseException if a parsing error occurs
	 */
	public List readBodyTokens(String line, int lineNum)
			throws LineNumberParseException {
		List tokens = new ArrayList<>();
		startBodyTextBuffer(1);
		StringBuilder specialBuffer = null;
		boolean foundComment = false;
		int i = 0;
		while (!foundComment && i < line.length()) {
			char c = line.charAt(i);
			if (specialBuffer == null) {
				switch(c) {
				case '$': // possible start of variable
					i = readBodyVariable(tokens, line, lineNum, i);
					break;
				case '\\':
				case '<': // possible start of <<
				case '>': // possible start of >>
				case '/': // possible start of //
					specialBuffer = new StringBuilder();
					specialBuffer.append(c);
					i++;
					break;
				case '[': // possible start of [[, only special outside command
				case ']': // possible start of ]], only special outside command
					if (bodyState.inCommand) {
						bodyState.textBuffer.append(c);
					} else {
						specialBuffer = new StringBuilder();
						specialBuffer.append(c);
					}
					i++;
					break;
				case '"': // start of quoted string, only special inside command
					if (bodyState.inCommand)
						i = readQuotedString(tokens, line, lineNum, i);
					else {
						bodyState.textBuffer.append(c);
						i++;
					}
					break;
				case '|': // reply separator, only special inside reply and outside command
					if (bodyState.inReply && !bodyState.inCommand) {
						finishTextToken(tokens, line, lineNum, i);
						finishReplySeparator(tokens, lineNum, i + 1);
						startBodyTextBuffer(i + 2);
					} else {
						bodyState.textBuffer.append(c);
					}
					i++;
					break;
				default:
					bodyState.textBuffer.append(c);
					i++;
				}
			} else {
				char specialStart = specialBuffer.charAt(0);
				switch (specialStart) {
				case '\\':
					bodyState.textBuffer.append(c);
					specialBuffer = null;
					i++;
					break;
				case '<':
					if (c == '<') {
						finishTextToken(tokens, line, lineNum, i - 1);
						finishCommandStart(tokens, lineNum, i);
						startBodyTextBuffer(i + 2);
						i++;
					} else {
						bodyState.textBuffer.append(specialBuffer);
					}
					specialBuffer = null;
					break;
				case '>':
					if (c == '>') {
						finishTextToken(tokens, line, lineNum, i - 1);
						finishCommandEnd(tokens, lineNum, i);
						startBodyTextBuffer(i + 2);
						i++;
					} else {
						bodyState.textBuffer.append(specialBuffer);
					}
					specialBuffer = null;
					break;
				case '[':
					if (c == '[') {
						finishTextToken(tokens, line, lineNum, i - 1);
						finishReplyStart(tokens, lineNum, i);
						startBodyTextBuffer(i + 2);
						i++;
					} else {
						bodyState.textBuffer.append(specialBuffer);
					}
					specialBuffer = null;
					break;
				case ']':
					if (c == ']') {
						finishTextToken(tokens, line, lineNum, i - 1);
						finishReplyEnd(tokens, lineNum, i);
						startBodyTextBuffer(i + 2);
						i++;
					} else {
						bodyState.textBuffer.append(specialBuffer);
					}
					specialBuffer = null;
					break;
				case '/':
					if (c == '/') {
						foundComment = true;
					} else {
						bodyState.textBuffer.append(specialBuffer);
					}
					specialBuffer = null;
					break;
				}
			}
		}
		if (specialBuffer != null) {
			char specialStart = specialBuffer.charAt(0);
			switch (specialStart) {
			case '\\':
				break;
			case '<':
			case '>':
			case '[':
			case ']':
			case '/':
				bodyState.textBuffer.append(specialStart);
				break;
			}
		}
		finishTextToken(tokens, line, lineNum, line.length());
		return tokens;
	}

	private int readBodyVariable(List tokens, String line,
			int lineNum, int start) {
		ReferenceParameter end = new ReferenceParameter<>();
		String varName = readVariableName(line, start + 1, end);
		if (varName.length() == 0) {
			bodyState.textBuffer.append('$');
			return end.get();
		}
		finishTextToken(tokens, line, lineNum, start);
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.VARIABLE);
		token.setText(line.substring(start, end.get()));
		token.setValue(varName);
		token.setLineNum(lineNum);
		token.setColNum(start + 1);
		tokens.add(token);
		startBodyTextBuffer(end.get() + 1);
		return end.get();
	}

	private String readVariableName(String line, int start,
			ReferenceParameter end) {
		for (int i = start; i < line.length(); i++) {
			char c = line.charAt(i);
			if (i == start && (c < 'A' || c > 'Z') &&
					(c < 'a' || c > 'z') && c != '_') {
				end.set(i);
				return "";
			} else if (i > start && (c < 'A' || c > 'Z') &&
					(c < 'a' || c > 'z') && (c < '0' || c > '9') && c != '_') {
				end.set(i);
				return line.substring(start, i);
			}
		}
		end.set(line.length());
		return line.substring(start);
	}

	private int readQuotedString(List tokens, String line,
			int lineNum, int start) throws LineNumberParseException {
		finishTextToken(tokens, line, lineNum, start);
		ReferenceParameter end = new ReferenceParameter<>();
		WoolVariableString string = readQuotedString(line, lineNum, start, end);
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.QUOTED_STRING);
		token.setLineNum(lineNum);
		token.setColNum(start + 1);
		token.setText(line.substring(start, end.get()));
		token.setValue(string);
		tokens.add(token);
		startBodyTextBuffer(end.get() + 1);
		return end.get();
	}

	private WoolVariableString readQuotedString(String line, int lineNum,
			int start, ReferenceParameter end)
			throws LineNumberParseException {
		WoolVariableString result = new WoolVariableString();
		StringBuilder textBuffer = new StringBuilder();
		int textStart = start + 1;
		boolean prevEscape = false;
		int i = start + 1;
		while (i < line.length()) {
			if (prevEscape) {
				prevEscape = false;
				i++;
				continue;
			}
			char c = line.charAt(i);
			switch (c) {
			case '\\':
				textBuffer.append(line, textStart, i);
				textStart = i + 1;
				prevEscape = true;
				i++;
				break;
			case '$':
				ReferenceParameter varEnd =
						new ReferenceParameter<>();
				String varName = readVariableName(line, i + 1, varEnd);
				if (varName.length() > 0) {
					textBuffer.append(line, textStart, i);
					if (textBuffer.length() > 0) {
						result.addSegment(new WoolVariableString.TextSegment(
								textBuffer.toString()));
					}
					result.addSegment(new WoolVariableString.VariableSegment(
							varName));
					textBuffer = new StringBuilder();
					textStart = varEnd.get();
					i = textStart;
				} else {
					i++;
				}
				break;
			case '"':
				textBuffer.append(line, textStart, i);
				if (textBuffer.length() > 0) {
					result.addSegment(new WoolVariableString.TextSegment(
							textBuffer.toString()));
				}
				end.set(i + 1);
				return result;
			default:
				i++;
			}
		}
		throw new LineNumberParseException("Quoted string not terminated",
				lineNum, start + 1);
	}
	
	private void startBodyTextBuffer(int colNum) {
		bodyState.textBuffer = new StringBuilder();
		bodyState.textStartCol = colNum;
	}
	
	private void finishTextToken(List tokens, String line,
			int lineNum, int end) {
		String text = bodyState.textBuffer.toString();
		if (text.length() == 0)
			return;
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.TEXT);
		token.setText(line.substring(bodyState.textStartCol - 1, end));
		token.setValue(text);
		token.setLineNum(lineNum);
		token.setColNum(bodyState.textStartCol);
		tokens.add(token);
	}
	
	private void finishCommandStart(List tokens, int lineNum,
			int colNum) throws LineNumberParseException {
		if (bodyState.inCommand) {
			throw new LineNumberParseException("Found << inside <<...>>",
					lineNum, colNum);
		}
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.COMMAND_START);
		token.setText("<<");
		token.setLineNum(lineNum);
		token.setColNum(colNum);
		tokens.add(token);
		bodyState.inCommand = true;
	}
	
	private void finishCommandEnd(List tokens, int lineNum,
			int colNum) throws LineNumberParseException {
		if (!bodyState.inCommand) {
			throw new LineNumberParseException("Found >> without preceding <<",
					lineNum, colNum);
		}
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.COMMAND_END);
		token.setText(">>");
		token.setLineNum(lineNum);
		token.setColNum(colNum);
		tokens.add(token);
		bodyState.inCommand = false;
	}
	
	private void finishReplyStart(List tokens, int lineNum,
			int colNum) throws LineNumberParseException {
		if (bodyState.inReply) {
			throw new LineNumberParseException("Found [[ inside [[...]]",
					lineNum, colNum);
		}
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.REPLY_START);
		token.setText("[[");
		token.setLineNum(lineNum);
		token.setColNum(colNum);
		tokens.add(token);
		bodyState.inReply = true;
	}
	
	private void finishReplyEnd(List tokens, int lineNum,
			int colNum) throws LineNumberParseException {
		if (!bodyState.inReply) {
			throw new LineNumberParseException("Found ]] without preceding [[",
					lineNum, colNum);
		}
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.REPLY_END);
		token.setText("]]");
		token.setLineNum(lineNum);
		token.setColNum(colNum);
		tokens.add(token);
		bodyState.inReply = false;
	}
	
	private void finishReplySeparator(List tokens, int lineNum,
			int colNum) {
		WoolBodyToken token = new WoolBodyToken();
		token.setType(WoolBodyToken.Type.REPLY_SEPARATOR);
		token.setText("|");
		token.setLineNum(lineNum);
		token.setColNum(colNum);
		tokens.add(token);
	}

	private static class BodyState {
		private boolean inCommand = false;
		private boolean inReply = false;
		private StringBuilder textBuffer;
		private int textStartCol;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy