de.be4.classicalb.core.parser.PreParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of bparser Show documentation
Part of the ProB Parser library
The newest version!
package de.be4.classicalb.core.parser;

import java.io.File;
import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import de.be4.classicalb.core.parser.analysis.checking.DefinitionCollector;
import de.be4.classicalb.core.parser.analysis.checking.DefinitionPreCollector;
import de.be4.classicalb.core.parser.exceptions.BCompoundException;
import de.be4.classicalb.core.parser.exceptions.BException;
import de.be4.classicalb.core.parser.exceptions.BLexerException;
import de.be4.classicalb.core.parser.exceptions.PreParseException;
import de.be4.classicalb.core.parser.node.ADefinitionExpression;
import de.be4.classicalb.core.parser.node.ADefinitionPredicate;
import de.be4.classicalb.core.parser.node.ADefinitionSubstitution;
import de.be4.classicalb.core.parser.node.AExpressionParseUnit;
import de.be4.classicalb.core.parser.node.AFunctionExpression;
import de.be4.classicalb.core.parser.node.AIdentifierExpression;
import de.be4.classicalb.core.parser.node.APredicateParseUnit;
import de.be4.classicalb.core.parser.node.EOF;
import de.be4.classicalb.core.parser.node.PExpression;
import de.be4.classicalb.core.parser.node.PParseUnit;
import de.be4.classicalb.core.parser.node.TDefLiteralPredicate;
import de.be4.classicalb.core.parser.node.TDefLiteralSubstitution;
import de.be4.classicalb.core.parser.node.TIdentifierLiteral;
import de.be4.classicalb.core.parser.node.Token;
import de.be4.classicalb.core.parser.util.Utils;
import de.be4.classicalb.core.preparser.lexer.LexerException;
import de.be4.classicalb.core.preparser.node.PPreParseUnit;
import de.be4.classicalb.core.preparser.node.TPreParserDefinitions;
import de.be4.classicalb.core.preparser.node.TPreParserIdentifier;
import de.be4.classicalb.core.preparser.node.TPreParserString;
import de.be4.classicalb.core.preparser.node.TRhsBody;
import de.be4.classicalb.core.preparser.parser.Parser;
import de.be4.classicalb.core.preparser.parser.ParserException;

/**
 * 
 * Pre-parsing: find and parse any referenced definition files (.def)
 * and determine the types of all definitions.
 * This is necessary because the parser handles expressions, predicates, and substitutions separately,
 * so different token/node types are needed for definition identifiers depending on whether they are
 * expressions ({@link TIdentifierLiteral}/{@link AIdentifierExpression}),
 * predicates ({@link TDefLiteralPredicate}/{@link ADefinitionPredicate}),
 * or substitutions ({@link TDefLiteralSubstitution}/{@link ADefinitionSubstitution}).
 * The PreParser collects all needed type information into {@link DefinitionTypes},
 * which is used by {@link BLexer} to convert all identifiers to the appropriate token/node types.
 * 
 * 
 * This is an annoying mess and nobody wants it,
 * but it's more or less necessary with the current parser architecture.
 * We have already tried to avoid/remove this step,
 * but haven't succeeded so far.
 * If you try to remove the PreParser,
 * please update the following counters afterwards:
 * 
 * 
 * 1 person has tried 4 times to remove the PreParser.
 * 
 * 
 * @see BLexer#replaceDefTokens()
 * @see BParser#preParsing(Reader, File, IFileContentProvider)
 * @see DefinitionCollector
 * @see DefinitionPreCollector
 */
public class PreParser {

	private final PushbackReader pushbackReader;
	private final File modelFile;
	private final DefinitionTypes definitionTypes;
	private final IDefinitions defFileDefinitions;
	private final ParseOptions parseOptions;
	private final IFileContentProvider contentProvider;
	private final List definitionFileIncludeStack;

	private int startLine;
	private int startColumn;

	public PreParser(PushbackReader pushbackReader, File modelFile,
			IFileContentProvider contentProvider,
			List definitionFileIncludeStack,
			ParseOptions parseOptions, IDefinitions definitions) {
		this.pushbackReader = pushbackReader;
		this.modelFile = modelFile;
		this.contentProvider = contentProvider;
		this.definitionFileIncludeStack = definitionFileIncludeStack;
		this.parseOptions = parseOptions;
		this.defFileDefinitions = definitions;
		this.definitionTypes = new DefinitionTypes();
		definitionTypes.addAll(definitions.getTypes());

		this.startLine = 1;
		this.startColumn = 1;
	}

	public void setStartPosition(final int line, final int column) {
		this.startLine = line;
		this.startColumn = column;
	}

	public void parse() throws PreParseException, IOException, BCompoundException {
		final PreLexer preLexer = new PreLexer(pushbackReader);
		preLexer.setPosition(this.startLine, this.startColumn);

		final Parser preParser = new Parser(preLexer);
		PPreParseUnit preParseUnit;
		try {
			preParseUnit = preParser.parse().getPPreParseUnit();
		} catch (final ParserException e) {
			String message;
			if (e.getToken() instanceof TPreParserDefinitions) {
				message = "Clause 'DEFINITIONS' is used more than once";
			} else {
				message = e.getRealMsg();
			}
			throw new PreParseException(e.getToken(), message, e);
		} catch (final LexerException e) {
			throw new PreParseException(e.getLine(), e.getPos(), e.getRealMsg(), e);
		}

		final DefinitionPreCollector collector = new DefinitionPreCollector();
		preParseUnit.apply(collector);

		Map definitions = new HashMap<>(collector.getDefinitions());
		for (TPreParserIdentifier nameToken : definitions.keySet()) {
			String name = nameToken.getText();
			if (Utils.isQuoted(name, '`')) {
				try {
					nameToken.setText(Utils.unquoteIdentifier(name));
				} catch (IllegalArgumentException exc) {
					throw new PreParseException(nameToken, exc.getMessage(), exc);
				}
			}
		}

		evaluateDefinitionFiles(collector.getFileDefinitions());

		List sortedDefinitionList = sortDefinitionsByTopologicalOrderAndCheckForCycles(definitions);

		evaluateTypes(sortedDefinitionList, definitions);

	}

	private void evaluateDefinitionFiles(List list)
			throws PreParseException, BCompoundException {

		IDefinitionFileProvider cache = null;
		if (contentProvider instanceof IDefinitionFileProvider) {
			cache = (IDefinitionFileProvider) contentProvider;
		}

		for (TPreParserString filenameString : list) {
			// Unquote and unescape the definition file name string.
			String quotedFilename = filenameString.getText();
			String fileName = Utils.unescapeStringContents(Utils.removeSurroundingQuotes(quotedFilename, '"'));
			// Note, that the fileName could be a relative path, e.g.
			// ./foo/bar/defs.def or an absolute path
			try {
				if (definitionFileIncludeStack.contains(fileName)) {
					StringBuilder sb = new StringBuilder();
					for (String string : definitionFileIncludeStack) {
						sb.append(string).append(" -> ");
					}
					sb.append(fileName);
					throw new PreParseException(filenameString,
							"Cyclic references in definition files: " + sb);
				}

				IDefinitions definitions;
				if (cache != null && cache.getDefinitions(fileName) != null) {
					definitions = cache.getDefinitions(fileName);
				} else {
					File directory = modelFile == null ? null : modelFile.getParentFile();
					final String content = contentProvider.getFileContent(directory, fileName);
					final File file = contentProvider.getFile(directory, fileName);
					final BParser parser = new BParser(fileName, parseOptions);
					parser.setContentProvider(contentProvider);
					parser.getDefinitionFileIncludeStack().addAll(definitionFileIncludeStack);
					parser.getDefinitionFileIncludeStack().add(fileName);
					parser.setDefinitions(new Definitions(file));
					parser.parseMachine(content, file);
					definitions = parser.getDefinitions();
					if (cache != null) {
						cache.storeDefinition(fileName, definitions);
					}
				}
				defFileDefinitions.addDefinitions(definitions);
				definitionTypes.addAll(definitions.getTypes());
			} catch (final IOException e) {
				throw new PreParseException(filenameString, "Definition file cannot be read: " + e, e);
			} catch (BCompoundException e) {
				throw e.withMissingLocations(BException.Location.locationsFromNodes(fileName, Collections.singletonList(filenameString)));
			}
		}
	}

	private void evaluateTypes(List sortedDefinitionList, final Map definitions)
			throws PreParseException {
		// use linked list as we rely on pop() and push()
		LinkedList remainingDefinitions = new LinkedList<>(sortedDefinitionList);
		LinkedList currentlyUnparseableDefinitions = new LinkedList<>();
		Set todoDefs = new HashSet<>();
		for (TPreParserIdentifier token : remainingDefinitions) {
			todoDefs.add(token.getText());
		}
		// use main parser for the rhs of each definition to determine type
		// if a definition can not be typed this way, it may be due to another
		// definition that is not yet parser (because it appears later in the
		// source code)
		// in this case, the definition is appended to the list again
		// the algorithm terminates if the queue is empty or if no definition
		// has been parsed
		boolean oneParsed = true;
		while (oneParsed) {
			oneParsed = false;

			while (!remainingDefinitions.isEmpty()) {
				TPreParserIdentifier definition = remainingDefinitions.pop();

				TRhsBody defRhs = definitions.get(definition);
				Definitions.Type type;
				DefinitionType definitionType = determineType(definition, defRhs, todoDefs);
				type = definitionType.type;
				if (type != null) {
					todoDefs.remove(definition.getText());
					oneParsed = true;
					definitionTypes.addTyping(definition.getText(), type);
					// types.addTyping(definition.getText(), type);
				} else {
					currentlyUnparseableDefinitions.push(definition);
				}
			}

			remainingDefinitions.addAll(currentlyUnparseableDefinitions);
			currentlyUnparseableDefinitions.clear();
		}

		if (!remainingDefinitions.isEmpty()) {
			TPreParserIdentifier definition = remainingDefinitions.pop();
			TRhsBody defRhs = definitions.get(definition);
			DefinitionType definitionType = determineType(definition, defRhs, todoDefs);
			if (definitionType.errorMessage != null) {
				String message = definitionType.errorMessage;
				if (modelFile != null) {
					message += " in file: " + modelFile;
				}
				throw new PreParseException(definitionType.errorToken.getLine(), definitionType.errorToken.getPos(), message);
			} else {
				// fall back message
				throw new PreParseException(definition, "expecting wellformed expression, predicate or substitution as DEFINITION body (DEFINITION arguments assumed to be expressions)");
			}
		}
	}

	private List sortDefinitionsByTopologicalOrderAndCheckForCycles(Map definitions)
			throws PreParseException {
		Set definitionNames = new HashSet<>();
		Map definitionMap = new HashMap<>();
		for (TPreParserIdentifier token : definitions.keySet()) {
			final String definitionName = token.getText();
			definitionNames.add(definitionName);
			definitionMap.put(definitionName, token);
		}
		Map> dependencies = determineDependencies(definitionNames, definitions);
		List sortedDefinitionNames = Utils.sortByTopologicalOrder(dependencies);
		if (sortedDefinitionNames.size() < definitionNames.size()) {
			Set remaining = new HashSet<>(definitionNames);
			remaining.removeAll(sortedDefinitionNames);
			List cycle = Utils.determineCycle(remaining, dependencies);
			StringBuilder sb = new StringBuilder();
			for (Iterator iterator = cycle.iterator(); iterator.hasNext();) {
				sb.append(iterator.next());
				if (iterator.hasNext()) {
					sb.append(" -> ");
				}
			}
			TPreParserIdentifier firstDefinitionToken = definitionMap.get(cycle.get(0));
			throw new PreParseException(firstDefinitionToken, "Cyclic references in definitions: " + sb);
		} else {
			List sortedDefinitionTokens = new ArrayList<>();
			for (String name : sortedDefinitionNames) {
				sortedDefinitionTokens.add(definitionMap.get(name));
			}
			return sortedDefinitionTokens;
		}

	}

	private Map> determineDependencies(Set definitionNames, Map definitions)
			throws PreParseException {
		HashMap> dependencies = new HashMap<>();
		for (Map.Entry entry : definitions.entrySet()) {
			TPreParserIdentifier nameToken = entry.getKey();
			TRhsBody rhsToken = entry.getValue();
			// The FORMULA_PREFIX is needed to switch the lexer state from
			// section to normal. Note, that we do not parse the right hand side
			// of the definition here. Hence FORMULA_PREFIX has no further
			// meaning and substitutions can also be handled by the lexer.
			final Reader reader = new StringReader(BParser.FORMULA_PREFIX + "\n" + rhsToken.getText());

			final BLexer lexer = new BLexer(new PushbackReader(reader, BLexer.PUSHBACK_BUFFER_SIZE),
					new DefinitionTypes());
			lexer.setParseOptions(parseOptions);
			Set set = new HashSet<>();
			Token next;
			try {
				next = lexer.next();
				while (!(next instanceof EOF)) {
					if (next instanceof TIdentifierLiteral) {
						TIdentifierLiteral id = (TIdentifierLiteral) next;
						String name;
						try {
							name = Utils.unquoteIdentifier(id.getText());
						} catch (IllegalArgumentException exc) {
							throw new PreParseException(rhsToken, exc.getMessage(), exc);
						}
						if (definitionNames.contains(name)) {
							set.add(name);
						}
					}
					next = lexer.next();
				}
			} catch (IOException e) {
				throw new PreParseException("Error while parsing", e);
			} catch (BLexerException e) {
				Token errorToken = e.getLastToken();
				correctErrorTokenPosition(nameToken, rhsToken, errorToken);
				throw new PreParseException(errorToken.getLine(), errorToken.getPos(), adjustErrorMessage(e.getRealMsg()), e);
			} catch (de.be4.classicalb.core.parser.lexer.LexerException e) {
				throw wrapLexerExceptionAndCorrectPosition(nameToken, rhsToken, e, e);
			}
			dependencies.put(nameToken.getText(), set);
		}
		return dependencies;
	}

	static class DefinitionType {
		Definitions.Type type;
		String errorMessage;
		Token errorToken;

		DefinitionType() {

		}

		DefinitionType(Definitions.Type t, Token n) {
			this.type = t;
			this.errorToken = n;
		}

		DefinitionType(Definitions.Type t) {
			this.type = t;
		}

		DefinitionType(String errorMessage, Token t) {
			this.errorMessage = errorMessage;
			this.errorToken = t;
		}
	}

	/**
	 * Try to determine the abstract type of the right-hand side of a definition,
	 * i. e. whether it's an expression, a predicate, or a substitution.
	 * If the right-hand side references other definitions,
	 * it may not be possible to determine this definition's type yet
	 * if the types of the other definitions aren't known yet.
	 * For such cases,
	 * {@link #evaluateTypes(List, Map)} calls this method repeatedly until the type can be successfully determined.
	 * 
	 * @param definition the definition name token
	 * @param rhsToken the right-hand side of the definition (as a single token, merged by the {@link PreLexer})
	 * @param untypedDefinitions names of all definitions whose types haven't been determined yet
	 * @return the type of the definition's right-hand side, or error information if the type cannot be determined yet
	 *     (but it's expected that the type can be determined later, once some other definitions' types are known)
	 * @throws PreParseException if the definition's right-hand side couldn't be parsed
	 *     (and the parse error is not expected to go away later, even after more definitions' types are known) 
	 */
	private DefinitionType determineType(TPreParserIdentifier definition, TRhsBody rhsToken,
			final Set untypedDefinitions) throws PreParseException {

		final String definitionRhs = rhsToken.getText();

		Token errorToken;
		try {
			// Try parsing the RHS as a Formula, i.e., either expression or predicate
			PParseUnit parseunit = tryParsing(BParser.FORMULA_PREFIX, definitionRhs);

			// check if the result is a Predicate?
			if (parseunit instanceof APredicateParseUnit) {
				return new DefinitionType(IDefinitions.Type.Predicate);
			}

			// check if we have definitely an Expression or an ambiguous Expression/Substitution (e.g. f(x))?

			AExpressionParseUnit expressionParseUnit = (AExpressionParseUnit) parseunit;

			PreParserIdentifierTypeVisitor visitor = new PreParserIdentifierTypeVisitor(untypedDefinitions);
			expressionParseUnit.apply(visitor);

			if (visitor.isUntypedDefinitionUsed()) {
				// the parseunit uses another definition which is not yet typed
				return new DefinitionType();
			}

			PExpression expression = expressionParseUnit.getExpression();
			if ((expression instanceof AIdentifierExpression) || (expression instanceof AFunctionExpression)
					|| (expression instanceof ADefinitionExpression)) {
				return new DefinitionType(IDefinitions.Type.ExprOrSubst);
			}

			return new DefinitionType(IDefinitions.Type.Expression);

		} catch (de.be4.classicalb.core.parser.parser.ParserException e) {
			errorToken = e.getToken();
			try {
			    // try parsing the RHS now as a substitution:
				tryParsing(BParser.SUBSTITUTION_PREFIX, definitionRhs);
				return new DefinitionType(IDefinitions.Type.Substitution, errorToken);
			} catch (de.be4.classicalb.core.parser.parser.ParserException ex) {
				Token errorToken2 = ex.getToken();
				if (errorToken.getLine() > errorToken2.getLine() || (errorToken.getLine() == errorToken2.getLine()
						&& errorToken.getPos() >= errorToken2.getPos())) {
					// use error message from Substitution
					correctErrorTokenPosition(definition, rhsToken, errorToken);
					return new DefinitionType(adjustErrorMessage(e.getRealMsg()), errorToken);
				} else {
					// use error message from Expression/Predicate parsing:
					correctErrorTokenPosition(definition, rhsToken, errorToken2);
					return new DefinitionType(adjustErrorMessage(ex.getRealMsg()), errorToken2);
				}
			} catch (BLexerException e1) {
				errorToken = e1.getLastToken();
				correctErrorTokenPosition(definition, rhsToken, errorToken);
				throw new PreParseException(errorToken.getLine(), errorToken.getPos(), adjustErrorMessage(e.getRealMsg()), e);
			} catch (de.be4.classicalb.core.parser.lexer.LexerException e3) {
				// FIXME Is the cause really supposed to be different here?
				throw wrapLexerExceptionAndCorrectPosition(definition, rhsToken, e3, e);
			} catch (IOException e1) {
				throw new PreParseException(e.toString(), e);
			}
		} catch (BLexerException e) {
			errorToken = e.getLastToken();
			correctErrorTokenPosition(definition, rhsToken, errorToken);
			throw new PreParseException(errorToken.getLine(), errorToken.getPos(), adjustErrorMessage(e.getRealMsg()), e);
		} catch (de.be4.classicalb.core.parser.lexer.LexerException e) {
			throw wrapLexerExceptionAndCorrectPosition(definition, rhsToken, e, e);
		} catch (IOException e) {
			throw new PreParseException(e.toString(), e);
		}

	}

	private static void correctErrorTokenPosition(
		TPreParserIdentifier definition,
		TRhsBody rhsToken,
		Token errorToken
	) {
		// the parsed string starts in the second line, e.g. #formula\n ...
		int line = errorToken.getLine();
		int pos = errorToken.getPos();
		pos = line == 2 ? rhsToken.getPos() + pos - 1 : pos;
		line = definition.getLine() + line - 2;
		errorToken.setLine(line);
		errorToken.setPos(pos);
	}

	private static String adjustErrorMessage(String message) {
		if (message.contains("expecting: EOF")) {
			return "expecting end of definition";
		} else {
			return message.replace("the end of file", "the end of definition");
		}
	}

	private static PreParseException wrapLexerExceptionAndCorrectPosition(
		TPreParserIdentifier definition,
		TRhsBody rhsToken,
		de.be4.classicalb.core.parser.lexer.LexerException exc,
		Throwable cause
	) {
		// the parsed string starts in the second line, e.g. #formula\n ...
		int line = exc.getLine();
		int pos = exc.getPos();
		pos = line == 2 ? rhsToken.getPos() + pos - 1 : pos;
		line = definition.getLine() + line - 2;
		return new PreParseException(line, pos, exc.getRealMsg(), cause);
	}

	private PParseUnit tryParsing(final String prefix, final String definitionRhs)
			throws de.be4.classicalb.core.parser.lexer.LexerException,
			de.be4.classicalb.core.parser.parser.ParserException, IOException {

		final Reader reader = new StringReader(prefix + "\n" + definitionRhs);
		final BLexer lexer = new BLexer(new PushbackReader(reader, BLexer.PUSHBACK_BUFFER_SIZE), this.definitionTypes);
		lexer.setParseOptions(parseOptions);
		final de.be4.classicalb.core.parser.parser.Parser parser = new de.be4.classicalb.core.parser.parser.Parser(lexer);
		return parser.parse().getPParseUnit();
	}

	public IDefinitions getDefFileDefinitions() {
		return defFileDefinitions;
	}

	public DefinitionTypes getDefinitionTypes() {
		return this.definitionTypes;
	}

}