All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.textmapper.tool.compiler.TMLexerCompiler Maven / Gradle / Ivy

There is a newer version: 0.9.5
Show newest version
/**
 * Copyright 2002-2020 Evgeny Gryaznov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.textmapper.tool.compiler;

import org.textmapper.lapg.LapgCore;
import org.textmapper.lapg.api.*;
import org.textmapper.lapg.api.ast.AstRawType;
import org.textmapper.lapg.api.ast.AstType;
import org.textmapper.lapg.api.builder.GrammarBuilder;
import org.textmapper.lapg.api.regex.RegexContext;
import org.textmapper.lapg.api.regex.RegexMatcher;
import org.textmapper.lapg.api.regex.RegexParseException;
import org.textmapper.lapg.api.regex.RegexPart;
import org.textmapper.tool.parser.TMTree;
import org.textmapper.tool.parser.ast.*;

import java.util.*;

/**
 * evgeny, 1/21/13
 */
public class TMLexerCompiler {

	private final TMTree tree;
	private final TMResolver resolver;
	private final GrammarBuilder builder;

	private final Map attributes = new HashMap<>();

	public TMLexerCompiler(TMResolver resolver) {
		this.resolver = resolver;
		this.tree = resolver.getTree();
		this.builder = resolver.getBuilder();
	}

	private void error(ITmaNode n, String message) {
		resolver.error(n, message);
	}

	private List resolveStates(TmaStartConditions conditions) {
		List result = new ArrayList<>();
		List refs = conditions.getStaterefListCommaSeparated();
		if (refs == null) {
			return resolver.allStates();
		}
		for (TmaStateref ref : refs) {
			LexerState applicable = resolver.getState(ref.getName().getText());
			if (applicable != null) {
				result.add(applicable);
			} else {
				error(ref, ref.getName() + " cannot be resolved");
			}
		}
		if (result.isEmpty()) {
			result.addAll(resolver.allStates());
		}
		return result;
	}

	private LexerRule getClassRule(Map classMatchers, TmaLexeme l,
								   RegexPart regex) {
		LexerRule result = null;
		TmaLexemeAttrs attrs = l.getAttrs();
		boolean isClass = attrs != null && attrs.getKind() == TmaLexemeAttribute.CLASS;
		if (regex.isConstant() && !isClass) {
			for (LexerRule rule : classMatchers.keySet()) {
				TmaLexeme astClassLexeme = (TmaLexeme) ((DerivedSourceElement) rule).getOrigin();
				if (!attributes.get(astClassLexeme).canBeClassFor(attributes.get(l))) {
					continue;
				}
				RegexMatcher m = classMatchers.get(rule);
				if (m.matches(regex.getConstantValue())) {
					if (result != null) {
						error(l, "regex matches two classes `" + result.getSymbol().getNameText() +
								"' and `" + rule.getSymbol().getNameText() + "', using first");
					} else {
						result = rule;
					}
				}
			}
		}
		return result;
	}

	public int getLexerRuleKind(TmaLexemeAttrs attr) {
		if (attr == null) {
			return LexerRule.KIND_NONE;
		}
		switch (attr.getKind()) {
			case CLASS:
				return LexerRule.KIND_CLASS;
			case LAYOUT:
				return LexerRule.KIND_LAYOUT;
			case SPACE:
				return LexerRule.KIND_SPACE;
		}

		return LexerRule.KIND_NONE;
	}

	private void collectAttributes(List states, ITmaLexerPart part) {
		if (part instanceof TmaLexeme) {
			TmaStartConditions conditions = ((TmaLexeme) part).getStartConditions();
			if (conditions != null) {
				states = resolveStates(conditions);
			}
			attributes.put((TmaLexeme) part, new RuleAttributes(states));
		} else if (part instanceof TmaStartConditionsScope) {
			TmaStartConditionsScope scope = (TmaStartConditionsScope) part;
			states = resolveStates(scope.getStartConditions());
			for (ITmaLexerPart p : scope.getLexerParts()) {
				collectAttributes(states, p);
			}
		}
	}

	public void compile() {
		// Step 1. Collect states.

		List defaultStates = resolver.inclusiveStates();
		for (ITmaLexerPart clause : tree.getRoot().getLexer()) {
			collectAttributes(defaultStates, clause);
		}

		// Step 2. Process class lexical rules.

		RegexContext context = resolver.createRegexContext();
		Map classMatchers = new LinkedHashMap<>();

		int order = 0;
		for (TmaLexeme lexeme : resolver.getLexerParts(TmaLexeme.class)) {
			order++;
			TmaLexemeAttrs attrs = lexeme.getAttrs();
			if (attrs == null || attrs.getKind() != TmaLexemeAttribute.CLASS) {
				continue;
			}
			if (lexeme.getPattern() == null) {
				error(lexeme, "class lexeme rule without regular expression, ignored");
				continue;
			}

			Symbol s = resolver.getSymbol(lexeme.getName().getText());
			if (!(s instanceof Terminal)) {
				// not a terminal? already reported, ignore
				continue;
			}
			Terminal classTerm = (Terminal) s;

			RegexPart regex;
			RegexMatcher matcher;
			try {
				regex = LapgCore.parse(s.getNameText(), lexeme.getPattern().getRegexp());
				matcher = LapgCore.createMatcher(regex, context);
			} catch (RegexParseException e) {
				error(lexeme.getPattern(), e.getMessage());
				continue;
			}

			int priority = lexeme.getPriority() == null ? 0 : lexeme.getPriority();
			List states = attributes.get(lexeme).getApplicableInStates();
			if (states.isEmpty()) {
				error(lexeme, "lexer rule is never applicable, ignored");
				continue;
			}

			LexerRule liLexerRule = builder.addLexerRule(LexerRule.KIND_CLASS, classTerm, regex,
					states, priority, order, null, lexeme);
			classMatchers.put(liLexerRule, matcher);
			TMDataUtil.putCodeTemplate(liLexerRule, lexeme.getCommand());
		}

		// Step 3. Process other lexical rules.

		order = 0;
		for (TmaLexeme lexeme : resolver.getLexerParts(TmaLexeme.class)) {
			order++;
			TmaLexemeAttrs attrs = lexeme.getAttrs();
			int kind = getLexerRuleKind(attrs);
			if (kind == LexerRule.KIND_CLASS) {
				continue;
			}

			Symbol s = resolver.getSymbol(lexeme.getName().getText());
			if (!(s instanceof Terminal)) {
				// not a terminal? already reported, ignore
				continue;
			}
			Terminal term = (Terminal) s;
			if (lexeme.getPattern() == null) {
				continue;
			}

			String name = lexeme.getName().getText();
			RegexPart regex;
			try {
				regex = LapgCore.parse(name, lexeme.getPattern().getRegexp());
			} catch (RegexParseException e) {
				error(lexeme.getPattern(), e.getMessage());
				continue;
			}

			LexerRule classRule = getClassRule(classMatchers, lexeme, regex);
			int priority = lexeme.getPriority() == null ? 0 : lexeme.getPriority();
			List states = attributes.get(lexeme).getApplicableInStates();
			if (states.isEmpty()) {
				error(lexeme, "lexer rule is never applicable, ignored");
				continue;
			}

			LexerRule liLexerRule = builder.addLexerRule(kind, term, regex,
					states, priority, order, classRule, lexeme);
			TMDataUtil.putCodeTemplate(liLexerRule, lexeme.getCommand());
		}
	}

	private static class RuleAttributes {

		private final List applicableInStates;

		public RuleAttributes(List applicableInStates) {
			this.applicableInStates = applicableInStates;
		}

		public List getApplicableInStates() {
			return applicableInStates;
		}

		public boolean canBeClassFor(RuleAttributes l) {
			if (applicableInStates.size() != l.getApplicableInStates().size()) {
				return false;
			}
			Collection applicableInStatesSet = applicableInStates.size() > 4
					? new HashSet<>(applicableInStates) : applicableInStates;

			return applicableInStatesSet.containsAll(l.getApplicableInStates());
		}

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy