All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.textmapper.lapg.lalr.Builder Maven / Gradle / Ivy

/**
 * Copyright 2002-2020 Evgeny Gryaznov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.textmapper.lapg.lalr;

import org.textmapper.lapg.api.*;

import java.util.*;

public class Builder extends Lalr1 {

	private Builder(Grammar g, ProcessingStatus status) {
		super(g, status);
	}

	// tables
	private int[] action_index;
	private int nactions;
	private int[] action_table;
	private ExplicitLookaheadBuilder lookaheadBuilder;

	private void verify_grammar() {
		int i, e, h;
		boolean k;

		// TODO use sym[i].isNullable to initialize sym_empty

		// search for symbols which accepts the empty chain
		search_next_empty:
		for (; ; ) {
			for (i = 0; i < rules; i++) {
				if (!sym_empty[rleft[i]]) {

					k = true;
					for (e = rindex[i]; k && (rright[e] >= 0); e++) {
						if (!sym_empty[rright[e]]) {
							k = false;
						}
					}

					if (k) {
						sym_empty[rleft[i]] = true;
						continue search_next_empty;
					}
				}
			}
			break;
		}

		for (i = nterms; i < nsyms; i++) {
			assert sym_empty[i] == ((Nonterminal) sym[i]).isNullable()
					: "old = " + sym_empty[i] + ", new = " + ((Nonterminal) sym[i]).isNullable() +
					" for " + sym[i].getNameText();
		}

		boolean[] sym_good = new boolean[nsyms];

		// terminal and empty symbols are good
		for (i = 0; i < nsyms; i++) {
			if (sym[i].isTerm() || sym_empty[i]) {
				sym_good[i] = true;
			}
		}

		// search for the good symbols
		get_next_good:
		for (; ; ) {
			for (i = 0; i < rules; i++) {
				if (!sym_good[rleft[i]]) {

					k = true;
					for (e = rindex[i]; k && (rright[e] >= 0); e++) {
						if (!sym_good[rright[e]]) {
							k = false;
						}
					}

					if (k) {
						sym_good[rleft[i]] = true;
						continue get_next_good;
					}
				}
			}
			break;
		}

		// search for the employed symbols
		boolean[] sym_employed = new boolean[nsyms];
		boolean[] sym_temp = new boolean[nsyms];
		k = true;
		for (i = 0; i < inputs.length; i++) {
			sym_temp[inputs[i]] = true;
		}
		while (k) {
			k = false;
			for (i = 0; i < nsyms; i++) {
				if (sym_temp[i]) {
					for (h = 0; h < rules; h++) {
						if (rleft[h] == i) {
							for (e = rindex[h]; rright[e] >= 0; e++) {
								if (!sym_temp[rright[e]] && !sym_employed[rright[e]]) {
									if (sym[rright[e]].isTerm()) {
										sym_employed[rright[e]] = true;
									} else {
										k = true;
										sym_temp[rright[e]] = true;
									}
								}
							}
						}
					}

					sym_employed[i] = true;
					sym_temp[i] = false;
				}
			}
		}

		// eoi is very useful token
		sym_good[eoi] = sym_employed[eoi] = true;

		// print out the useless symbols
		for (i = 0; i < nsyms; i++) {
			if (!sym_good[i] || !sym_employed[i]) {
				if (!sym[i].getNameText().startsWith("_skip")) {
					status.report(ProcessingStatus.KIND_WARN, "symbol `" + sym[i].getNameText()
							+ "` is useless", sym[i]);
				}
			}
		}
	}

	// returns 0:unresolved 1:shift 2:reduce 3:error
	private int compare_prio(int rule, int next) {
		int i, cgroup, assoc = -1, rule_group = -1, next_group = -1, nextassoc = -1;

		if (priorul.length == 0) {
			return 0;
		}

		for (cgroup = i = 0; i < priorul.length; i++) {
			if (priorul[i] < 0) {
				assoc = -priorul[i];
				cgroup++;
			} else {
				if (priorul[i] == rprio[rule]) {
					rule_group = cgroup;
				}
				if (priorul[i] == next) {
					next_group = cgroup;
					nextassoc = assoc;
				}
			}
		}

		if (rule_group == -1 || next_group == -1) {
			return 0;
		}
		if (rule_group > next_group) {
			return 2; // reduce
		}
		if (rule_group < next_group) {
			return 1; // shift
		}
		if (nextassoc == 1) {
			return 2; // left => reduce
		}
		if (nextassoc == 2) {
			return 1; // right => shift
		}
		if (nextassoc == 3) {
			return 3; // nonassoc => error
		}
		return 0;
	}

	private void action() {
		List actionTables = new ArrayList<>();
		int rr = 0, sr = 0;
		int[] actionset = new int[nterms];
		int[] next = new int[nterms];
		ConflictBuilder conflicts = new ConflictBuilder(nterms);
		List pending = new ArrayList<>();

		action_index = new int[nstates];
		action_table = null;
		nactions = 0;

		for (State t = first; t != null; t = t.next) {
			if (t.LR0) {
				if (t.nreduce > 0) {
					action_index[t.number] = t.reduce[0];
				} else if (t.nshifts > 0) {
					action_index[t.number] = -1;
				} else {
					action_index[t.number] = -2;
				}
			} else {

				// prepare
				int setsize = 0;
				Arrays.fill(next, -2);

				// process shifts
				for (int i = 0; i < t.nshifts; i++) {
					int termSym = state[t.shifts[i]].symbol;
					if (termSym >= nterms) break;

					assert next[termSym] == -2;
					next[termSym] = -1;
					actionset[setsize++] = termSym;
				}

				// reset conflicts
				conflicts.clear();

				// process reduces
				for (int i = laindex[t.number]; i < laindex[t.number + 1]; i++) {
					int ai = i * termset;
					int max = ai + termset;
					int termSym = 0;
					for (; ai < max; ai++) {
						int bits = LA[ai];
						if (bits == 0) {
							termSym += BITS;
						} else {
							for (int e = 0; e < BITS; e++) {
								if ((bits & (1 << e)) != 0) {
									if (next[termSym] == -2) {
										// OK
										next[termSym] = larule[i];
										actionset[setsize++] = termSym;
									} else {
										addReduce(next, termSym, larule[i], conflicts);
									}
								}
								termSym++;
							}
						}
					}
				}

				// merge conflicts
				List mergedConflicts = conflicts.getMergedConflicts(t.number,
						getInput(t.number));
				for (ParserConflict conflict : mergedConflicts) {
					if (conflict.getKind() == ParserConflict.FIXED) {
						status.report(conflict);
					} else {
						pending.add(conflict);
					}
					switch (conflict.getKind()) {
						case ParserConflict.REDUCE_REDUCE:
							rr += conflict.getSymbols().length;
							break;
						case ParserConflict.SHIFT_REDUCE:
							sr += conflict.getSymbols().length;
							break;
					}
				}

				// process non-assoc syntax errors
				for (int i = 0; i < nterms; i++) {
					if (next[i] == -3) {
						next[i] = -2;
					}
				}

				// insert into action_table
				int[] stateActions = new int[2 * (setsize + 1)];
				action_index[t.number] = -3 - nactions;
				int e = 0;
				for (int i = 0; i < setsize; i++) {
					stateActions[e++] = actionset[i];
					stateActions[e++] = next[actionset[i]];
				}
				stateActions[e++] = -1;
				stateActions[e++] = -2;
				actionTables.add(stateActions);
				nactions += stateActions.length;
			}
		}
		if (sr != expectSR || rr != expectRR) {
			for (ParserConflict c : pending) {
				status.report(c);
			}
			status.report(ProcessingStatus.KIND_ERROR, "conflicts: " + sr + " shift/reduce and "
					+ rr + " reduce/reduce");
		}

		int e = 0;
		action_table = new int[nactions];
		for (int[] stateActions : actionTables) {
			for (int stateAction : stateActions) {
				action_table[e++] = stateAction;
			}
		}
	}

	private void addReduce(int[] next, int termSym, int rule, ConflictBuilder builder) {
		if (builder.hasConflict(termSym)) {
			builder.addReduce((Terminal) sym[termSym], ConflictBuilder.CONFLICT, wrules[rule],
					null);

		} else if (next[termSym] == -1) {
			switch (compare_prio(rule, termSym)) {
				case 0: // shift/reduce
					builder.addReduce((Terminal) sym[termSym], ConflictBuilder.CONFLICT,
							wrules[rule], null);
					break;
				case 1: // shift
					builder.addReduce((Terminal) sym[termSym], ConflictBuilder.SHIFT,
							wrules[rule], null);
					break;
				case 2: // reduce
					builder.addReduce((Terminal) sym[termSym], ConflictBuilder.REDUCE,
							wrules[rule], null);
					next[termSym] = rule;
					break;
				case 3: // error (non-assoc)
					builder.addReduce((Terminal) sym[termSym], ConflictBuilder.SYNTAXERR,
							wrules[rule], null);
					next[termSym] = -3;
					break;
			}
		} else if (next[termSym] == -3) {
			builder.addReduce((Terminal) sym[termSym], ConflictBuilder.CONFLICT, wrules[rule],
					null);
		} else {
			// reduce/reduce
			int prevRule = next[termSym];
			if (lookaheadBuilder.isResolutionRule(prevRule)) {
				if (sym[rleft[rule]] instanceof Lookahead) {
					// Updating the resolution rule to include a new lookahead.
					next[termSym] = lookaheadBuilder.addResolutionRule(prevRule,
							(Lookahead) sym[rleft[rule]]);
					return;
				} else {
					// Resolution rules are not part of the grammar, report one of the
					// original lookahead rules.
					prevRule = lookaheadBuilder.getRefRule(prevRule);
				}
			} else if (sym[rleft[rule]] instanceof Lookahead
					&& sym[rleft[prevRule]] instanceof Lookahead) {

				// Conflicting lookaheads need a new resolution rule.
				Set set = new LinkedHashSet<>();
				set.add((Lookahead) sym[rleft[prevRule]]);
				set.add((Lookahead) sym[rleft[rule]]);
				next[termSym] = lookaheadBuilder.addResolutionRule(set, prevRule);
				return;
			}
			builder.addReduce((Terminal) sym[termSym], ConflictBuilder.CONFLICT, wrules[rule],
					wrules[prevRule]);
		}
	}

	private ParserTables generate() {
		if (inputs == null || inputs.length == 0) {
			status.report(ProcessingStatus.KIND_ERROR, "input symbol is not defined");
			return null;
		}

		for (int input : inputs) {
			if (input == -1) {
				status.report(ProcessingStatus.KIND_ERROR, "input symbol is not defined"); // FIXME
				return null;
			}
		}

		if (eoi == -1) {
			status.report(ProcessingStatus.KIND_ERROR, "the end-of-input symbol is not defined");
			return null;
		}

		// grammar
		verify_grammar();

		// engine
		if (!buildLR0()) {
			return null;
		}

		lookaheadBuilder = new ExplicitLookaheadBuilder(rules, status);
		buildLalr();
		action();
		return createResult();
	}

	private ParserTables createResult() {
		// Compacting resolution rules.
		lookaheadBuilder.assignIndices();
		for (int i = 1; i < action_table.length; i += 2) {
			if (action_table[i] >= this.rules) {
				action_table[i] = lookaheadBuilder.getRuleIndex(action_table[i]);
			}
		}
		lookaheadBuilder.compact();
		LookaheadRule[] resolutionRules = lookaheadBuilder.extractRules();

		int[] rlen = new int[this.rules + resolutionRules.length];
		for (int i = 0; i < this.rules; i++) {
			int e = 0;
			while (rright[rindex[i] + e] >= 0) e++;
			rlen[i] = e;
		}
		int nrules = this.rules;
		int[] rleft = Arrays.copyOf(this.rleft, nrules + resolutionRules.length);
		for (LookaheadRule r : resolutionRules) {
			rleft[nrules] = r.getDefaultTarget().getIndex();
			rlen[nrules++] = 0;
		}
		int[] goto_ = Arrays.copyOf(term_goto, term_goto.length);
		for (int i = 0; i < goto_.length; i++) {
			goto_[i] *= 2;
		}

		return new ParserTables(sym,
				nrules, nsyms, nterms, nstates,
				rleft, rlen,
				goto_, interleave(term_from, term_to),
				action_table, action_index, final_states,
				markers, resolutionRules);
	}

	public static ParserData compile(Grammar g, ProcessingStatus status) {
		Builder en = new Builder(g, status);
		return en.generate();
	}

	private static int[] interleave(int[] arr1, int[] arr2) {
		if (arr1 == null || arr2 == null || arr1.length != arr2.length) {
			throw new IllegalArgumentException("cannot interleave arrays of different length");
		}
		int[] result = new int[arr1.length * 2];
		int idx = 0;
		for (int i = 0; i < arr1.length; i++) {
			result[idx++] = arr1[i];
			result[idx++] = arr2[i];
		}
		return result;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy