All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.textmapper.tool.compiler.TMLexerCompiler Maven / Gradle / Ivy

There is a newer version: 0.10.0
Show newest version
/**
 * Copyright 2002-2016 Evgeny Gryaznov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.textmapper.tool.compiler;

import org.textmapper.lapg.LapgCore;
import org.textmapper.lapg.api.*;
import org.textmapper.lapg.api.ast.AstRawType;
import org.textmapper.lapg.api.ast.AstType;
import org.textmapper.lapg.api.builder.GrammarBuilder;
import org.textmapper.lapg.api.regex.RegexContext;
import org.textmapper.lapg.api.regex.RegexMatcher;
import org.textmapper.lapg.api.regex.RegexParseException;
import org.textmapper.lapg.api.regex.RegexPart;
import org.textmapper.tool.parser.TMTree;
import org.textmapper.tool.parser.ast.*;

import java.util.*;

/**
 * evgeny, 1/21/13
 */
public class TMLexerCompiler {

	private final TMTree tree;
	private final TMResolver resolver;
	private final GrammarBuilder builder;

	private final Map attributes =
			new HashMap<>();

	public TMLexerCompiler(TMResolver resolver) {
		this.resolver = resolver;
		this.tree = resolver.getTree();
		this.builder = resolver.getBuilder();
	}

	private void error(ITmaNode n, String message) {
		resolver.error(n, message);
	}

	private List convertApplicableStates(TmaStateSelector selector) {
		List result = new ArrayList<>();
		for (TmaLexerState state : selector.getStates()) {
			LexerState applicable = resolver.getState(state.getName().getID());
			result.add(applicable);
		}
		return result;
	}

	private TMStateTransitionSwitch convertTransitions(TmaStateSelector selector) {
		boolean noDefault = false;
		for (TmaLexerState state : selector.getStates()) {
			if (state.getDefaultTransition() == null) {
				noDefault = true;
			}
		}

		LexerState defaultTransition = null;
		Map stateSwitch = new LinkedHashMap<>();
		for (TmaLexerState state : selector.getStates()) {
			if (state.getDefaultTransition() == null) {
				continue;
			}
			String targetName = state.getDefaultTransition().getName();
			LexerState target = resolver.getState(targetName);
			if (target == null) {
				error(state.getDefaultTransition(), targetName + " cannot be resolved");
				continue;
			}

			if (defaultTransition == null && !(noDefault)) {
				defaultTransition = target;
			} else if (defaultTransition != target) {
				LexerState source = resolver.getState(state.getName().getID());
				stateSwitch.put(source, target);
			}
		}
		return stateSwitch.isEmpty() && defaultTransition == null ? null :
				new TMStateTransitionSwitch(stateSwitch.isEmpty() ? null : stateSwitch,
						defaultTransition);
	}

	private TMStateTransitionSwitch getTransition(TmaLexeme lexeme,
												  TMStateTransitionSwitch active) {
		TmaStateref transition = lexeme.getTransition();
		if (transition != null) {
			String targetName = transition.getName();
			LexerState target = resolver.getState(targetName);
			if (target == null) {
				error(transition, targetName + " cannot be resolved");
			} else {
				return new TMStateTransitionSwitch(target);
			}
		}
		return active;
	}

	private LexerRule getClassRule(Map classMatchers, TmaLexeme l,
								   RegexPart regex) {
		LexerRule result = null;
		TmaLexemeAttrs attrs = l.getAttrs();
		boolean isClass = attrs != null && attrs.getKind() == TmaLexemeAttribute.LCLASS;
		if (regex.isConstant() && !isClass) {
			for (LexerRule rule : classMatchers.keySet()) {
				TmaLexeme astClassLexeme = (TmaLexeme) ((DerivedSourceElement) rule).getOrigin();
				if (!attributes.get(astClassLexeme).canBeClassFor(attributes.get(l))) {
					continue;
				}
				RegexMatcher m = classMatchers.get(rule);
				if (m.matches(regex.getConstantValue())) {
					if (result != null) {
						error(l, "regex matches two classes `" + result.getSymbol().getNameText() +
								"' and `" + rule.getSymbol().getNameText() + "', using first");
					} else {
						result = rule;
					}
				}
			}
		}
		return result;
	}

	public int getLexerRuleKind(TmaLexemeAttrs attr) {
		if (attr == null) {
			return LexerRule.KIND_NONE;
		}
		switch (attr.getKind()) {
			case LCLASS:
				return LexerRule.KIND_CLASS;
			case LLAYOUT:
				return LexerRule.KIND_LAYOUT;
			case LSOFT:
				return LexerRule.KIND_SOFT;
			case LSPACE:
				return LexerRule.KIND_SPACE;
		}

		return LexerRule.KIND_NONE;
	}

	public void compile() {
		Map softToClass = new HashMap<>();
		Set nonSoft = new HashSet<>();

		// Step 1. Collect states & transitions (attributes).

		TMStateTransitionSwitch activeTransitions = null;
		List activeStates = Collections.singletonList(resolver.getState(
				TMResolver.INITIAL_STATE.text()));

		for (ITmaLexerPart clause : tree.getRoot().getLexer()) {
			if (clause instanceof TmaLexeme) {
				TmaLexeme lexeme = (TmaLexeme) clause;
				attributes.put(lexeme, new RuleAttributes(getTransition(lexeme, activeTransitions),
						activeStates));
			} else if (clause instanceof TmaStateSelector) {
				activeStates = convertApplicableStates((TmaStateSelector) clause);
				activeTransitions = convertTransitions((TmaStateSelector) clause);
			}
		}

		// Step 2. Process class lexical rules.

		RegexContext context = resolver.createRegexContext();
		Map classMatchers = new LinkedHashMap<>();

		for (ITmaLexerPart clause : tree.getRoot().getLexer()) {
			if (!(clause instanceof TmaLexeme)) {
				continue;
			}
			TmaLexeme lexeme = (TmaLexeme) clause;
			TmaLexemeAttrs attrs = lexeme.getAttrs();
			if (attrs == null || attrs.getKind() != TmaLexemeAttribute.LCLASS) {
				continue;
			}
			if (lexeme.getPattern() == null) {
				error(lexeme, "class lexeme rule without regular expression, ignored");
				continue;
			}

			Symbol s = resolver.getSymbol(lexeme.getName().getID());
			if (!(s instanceof Terminal)) {
				// not a terminal? already reported, ignore
				continue;
			}
			Terminal classTerm = (Terminal) s;
			nonSoft.add(classTerm);

			RegexPart regex;
			RegexMatcher matcher;
			try {
				regex = LapgCore.parse(s.getNameText(), lexeme.getPattern().getRegexp());
				matcher = LapgCore.createMatcher(regex, context);
			} catch (RegexParseException e) {
				error(lexeme.getPattern(), e.getMessage());
				continue;
			}

			int priority = lexeme.getPriority() == null ? 0 : lexeme.getPriority();
			LexerRule liLexerRule = builder.addLexerRule(LexerRule.KIND_CLASS, classTerm, regex,
					attributes.get(lexeme).getApplicableInStates(), priority, null, lexeme);
			classMatchers.put(liLexerRule, matcher);
			TMDataUtil.putCode(liLexerRule, lexeme.getCommand());
			TMDataUtil.putTransition(liLexerRule, attributes.get(lexeme).getTransitions());
		}

		// Step 3. Process other lexical rules. Match soft lexemes with their classes.

		for (ITmaLexerPart clause : tree.getRoot().getLexer()) {
			if (!(clause instanceof TmaLexeme)) {
				continue;
			}
			TmaLexeme lexeme = (TmaLexeme) clause;
			TmaLexemeAttrs attrs = lexeme.getAttrs();
			int kind = getLexerRuleKind(attrs);
			if (kind == LexerRule.KIND_CLASS) {
				continue;
			}

			Symbol s = resolver.getSymbol(lexeme.getName().getID());
			if (!(s instanceof Terminal)) {
				// not a terminal? already reported, ignore
				continue;
			}
			Terminal term = (Terminal) s;

			boolean isSoft = (kind == LexerRule.KIND_SOFT);
			if (isSoft && nonSoft.contains(term)) {
				error(lexeme, "redeclaration of non-soft terminal: " + lexeme.getName());
				continue;
			} else if (!isSoft) {
				if (softToClass.containsKey(term)) {
					error(lexeme, "redeclaration of soft terminal: " + lexeme.getName());
					continue;
				}
				nonSoft.add(term);
			}

			if (lexeme.getPattern() == null) {
				if (isSoft) {
					error(lexeme, "soft lexeme rule `" + lexeme.getName().getID() +
							"' should have a regular expression");
				}
				continue;
			}

			String name = lexeme.getName().getID();
			RegexPart regex;
			try {
				regex = LapgCore.parse(name, lexeme.getPattern().getRegexp());
			} catch (RegexParseException e) {
				error(lexeme.getPattern(), e.getMessage());
				continue;
			}

			if (isSoft && lexeme.getCommand() != null) {
				// TODO Note: soft lexeme is able to override the code
				error(lexeme.getCommand(), "soft lexeme rule `" + lexeme.getName().getID() +
						"' cannot have a semantic action");
			}
			LexerRule classRule = getClassRule(classMatchers, lexeme, regex);
			if (isSoft) {
				if (classRule == null) {
					error(lexeme, "soft lexeme rule `" + name + "' " +
							(regex.isConstant() ? "doesn't match any class rule" :
									"should have a constant regexp"));
					continue;
				}
				Terminal softClass = classRule.getSymbol();

				String type = lexeme.getType();
				String classtype = getSymbolType(softClass);
				if (type != null && !type.equals(classtype)) {
					error(lexeme, "soft terminal `" + name + "' overrides base type: expected `" +
							(classtype == null ? "" : classtype) + "', found `" + type +
							"'");
					continue;
				}

				final Terminal oldClass = softToClass.get(term);
				if (oldClass != null && oldClass != softClass) {
					error(lexeme, "redeclaration of soft class for `" + term.getNameText() +
							"': found " + softClass.getNameText() + " instead of " +
							oldClass.getNameText());
					continue;
				} else if (oldClass == null) {
					builder.makeSoft(term, softClass);
					softToClass.put(term, softClass);
				}

				// TODO check applicable states
			}

			int priority = lexeme.getPriority() == null ? 0 : lexeme.getPriority();
			LexerRule liLexerRule = builder.addLexerRule(kind, term, regex,
					attributes.get(lexeme).getApplicableInStates(), priority, classRule, lexeme);
			TMDataUtil.putCode(liLexerRule, lexeme.getCommand());
			TMDataUtil.putTransition(liLexerRule, attributes.get(lexeme).getTransitions());
		}
	}

	private static String getSymbolType(Symbol s) {
		final AstType type = s.getType();
		return type instanceof AstRawType ? ((AstRawType) type).getRawType() : null;
	}

	private static class RuleAttributes {

		private final TMStateTransitionSwitch transitions;
		private final List applicableInStates;

		public RuleAttributes(TMStateTransitionSwitch transitions,
							  List applicableInStates) {
			this.transitions = transitions;
			this.applicableInStates = applicableInStates;
		}

		public TMStateTransitionSwitch getTransitions() {
			return transitions;
		}

		public List getApplicableInStates() {
			return applicableInStates;
		}

		public boolean canBeClassFor(RuleAttributes l) {
			if (applicableInStates.size() != l.getApplicableInStates().size()) {
				return false;
			}
			Collection applicableInStatesSet = applicableInStates.size() > 4
					? new HashSet<>(applicableInStates) : applicableInStates;
			if (!(applicableInStatesSet.containsAll(l.getApplicableInStates()))) {
				return false;
			}
			return this.transitions == null ? l.getTransitions() == null :
					this.transitions.equals(l.getTransitions());
		}

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy