All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.github.liuyehcf.framework.compile.engine.cfg.AbstractCfgCompiler Maven / Gradle / Ivy
package com.github.liuyehcf.framework.compile.engine.cfg;
import com.github.liuyehcf.framework.compile.engine.CompileResult;
import com.github.liuyehcf.framework.compile.engine.cfg.lexical.LexicalAnalyzer;
import com.github.liuyehcf.framework.compile.engine.grammar.converter.GrammarConverterPipeline;
import com.github.liuyehcf.framework.compile.engine.grammar.definition.*;
import com.github.liuyehcf.framework.compile.engine.utils.Assert;
import com.github.liuyehcf.framework.compile.engine.utils.SetUtils;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
/**
* CFG文法编译器抽象基类
*
* @author hechenfeng
* @date 2018/04/16
*/
public abstract class AbstractCfgCompiler implements CfgCompiler, Serializable {
/**
* 词法分析器
*/
protected final LexicalAnalyzer lexicalAnalyzer;
/**
* 原始文法
*/
private final Grammar originalGrammar;
/**
* 文法转换pipeline
*/
private final GrammarConverterPipeline grammarConverterPipeline;
/**
* 转换后的文法
*/
protected Grammar grammar;
/**
* 非终结符->产生式的映射
*/
private Map productionMap;
/**
* first集
*/
private Map> firsts;
/**
* follow集
*/
private Map> follows;
private boolean isLegal;
protected AbstractCfgCompiler(Grammar originalGrammar, LexicalAnalyzer lexicalAnalyzer,
GrammarConverterPipeline grammarConverterPipeline) {
if (originalGrammar == null || lexicalAnalyzer == null) {
throw new NullPointerException();
}
this.originalGrammar = originalGrammar;
this.lexicalAnalyzer = lexicalAnalyzer;
this.grammarConverterPipeline = grammarConverterPipeline;
init();
}
protected Map getProductionMap() {
return productionMap;
}
protected Set getFollowsOf(Symbol symbol) {
return follows.get(symbol);
}
@Override
public final CompileResult compile(String input) {
if (!isLegal) {
throw new RuntimeException(this.getClass().getSimpleName() + " can't support this Grammar");
}
return doCompile(input);
}
/**
* 执行具体的编译操作,交由子类实现
*
* @param input 待匹配的输入
* @return 编译结果
*/
protected abstract CompileResult doCompile(String input);
@Override
public final Grammar getGrammar() {
return grammar;
}
private void init() {
/*
* 转换给定文法,包括消除直接/间接左递归;提取公因子
*/
convertGrammar();
/*
* 计算first集
*/
calculateFirst();
/*
* 计算follow集
*/
calculateFollow();
}
private void convertGrammar() {
this.grammar = grammarConverterPipeline.convert(originalGrammar);
this.productionMap = new HashMap<>(16);
for (Production p : grammar.getProductions()) {
Assert.assertFalse(productionMap.containsKey(p.getLeft()));
productionMap.put(p.getLeft(), p);
}
}
private void calculateFirst() {
firsts = new HashMap<>(16);
/*
* 首先,处理所有的终结符
*/
for (Symbol symbol : this.grammar.getTerminators()) {
firsts.put(symbol, SetUtils.of(symbol));
}
/*
* 处理非终结符
*/
boolean canBreak = false;
while (!canBreak) {
Map> newFirsts = copyFirst();
for (Symbol x : this.grammar.getNonTerminators()) {
Production px = productionMap.get(x);
Assert.assertNotNull(px);
/*
* 如果X是一个非终结符,且X→Y1...Yk∈P(k≥1)
* 那么如果对于某个i,a在FIRST(Yi)中且ε在所有的FIRST(Y1),...,FIRST(Yi−1)中(即Y1...Yi−1⇒∗ε),就把a加入到FIRST(X)中
* 如果对于所有的j=1,2,...,k,ε在FIRST(Yj)中,那么将ε加入到FIRST(X)
*/
for (PrimaryProduction ppx : px.getPrimaryProductions()) {
boolean canReachEpsilon = true;
for (int i = 0; i < ppx.getRight().getSymbols().size(); i++) {
Symbol yi = ppx.getRight().getSymbols().get(i);
if (!newFirsts.containsKey(yi)) {
/*
* 说明该符号的first集尚未计算,因此跳过当前子表达式
*/
canReachEpsilon = false;
break;
} else {
/*
* 首先,将Yi的first集(除了ε)添加到Xi的first集中
*/
if (!newFirsts.containsKey(x)) {
newFirsts.put(x, new HashSet<>());
}
newFirsts.get(x).addAll(
SetUtils.extract(
newFirsts.get(yi),
Symbol.EPSILON
)
);
/*
* 若Yi的first集不包含ε,那么到子表达式循环结束
*/
if (!newFirsts.get(yi).contains(Symbol.EPSILON)) {
canReachEpsilon = false;
break;
}
}
}
if (canReachEpsilon) {
newFirsts.get(x).add(Symbol.EPSILON);
}
}
}
if (newFirsts.equals(this.firsts)) {
canBreak = true;
} else {
this.firsts = newFirsts;
canBreak = false;
}
}
}
private Map> copyFirst() {
Map> copy = new HashMap<>(16);
for (Map.Entry> entry : firsts.entrySet()) {
copy.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
return copy;
}
private void calculateFollow() {
follows = new HashMap<>(16);
/*
* 将$放入FOLLOW(S)中,其中S是开始符号,$是输入右端的结束标记
*/
follows.put(this.grammar.getStart(), SetUtils.of(Symbol.DOLLAR));
boolean canBreak = false;
while (!canBreak) {
Map> newFollows = copyFollow();
for (Symbol a : this.grammar.getNonTerminators()) {
Production pa = productionMap.get(a);
Assert.assertNotNull(pa);
for (PrimaryProduction ppa : pa.getPrimaryProductions()) {
for (int i = 0; i < ppa.getRight().getSymbols().size(); i++) {
Symbol b = ppa.getRight().getSymbols().get(i);
SymbolString beta = null;
if (b.isTerminator()) {
continue;
}
if (i < ppa.getRight().getSymbols().size() - 1) {
beta = ppa.getRight().getSubSymbolString(i + 1);
}
/*
* 如果存在一个产生式A→αBβ,那么FIRST(β)中除ε之外的所有符号都在FOLLOW(B)中
*/
if (beta != null) {
if (!newFollows.containsKey(b)) {
newFollows.put(b, new HashSet<>());
}
Set firstsOfBeta = getFirstsOf(beta);
Assert.assertNotNull(firstsOfBeta);
newFollows.get(b).addAll(
SetUtils.extract(
firstsOfBeta,
Symbol.EPSILON)
);
}
/*
* 如果存在一个产生式A→αB,或存在产生式A→αBβ且FIRST(β)包含ε,那么FOLLOW(A)中的所有符号都在FOLLOW(B)中
*/
if (beta == null
|| epsilonInvolvedInFirstsOf(beta)) {
if (newFollows.containsKey(a)) {
if (!newFollows.containsKey(b)) {
newFollows.put(b, new HashSet<>());
}
newFollows.get(b).addAll(
newFollows.get(a)
);
}
}
}
}
}
if (newFollows.equals(this.follows)) {
canBreak = true;
} else {
this.follows = newFollows;
canBreak = false;
}
}
/*
* 检查一下是否所有的非终结符都有了follow集
*/
for (Symbol nonTerminator : this.grammar.getNonTerminators()) {
Assert.assertFalse(follows.get(nonTerminator) == null
|| follows.get(nonTerminator).isEmpty());
}
}
private Map> copyFollow() {
Map> copy = new HashMap<>(16);
for (Map.Entry> entry : follows.entrySet()) {
copy.put(entry.getKey(), new HashSet<>(entry.getValue()));
}
return copy;
}
@Override
public final String getFirstJSONString() {
return getJSONStringFor(this.firsts, true);
}
@Override
public final String getFollowJSONString() {
return getJSONStringFor(this.follows, false);
}
private String getJSONStringFor(Map> map, boolean containsTerminator) {
StringBuilder sb = new StringBuilder();
sb.append('{');
if (containsTerminator) {
sb.append("\"terminator\":");
sb.append('{');
appendAttr(sb, map, this.grammar.getTerminators());
Assert.assertFalse(this.grammar.getTerminators().isEmpty());
sb.setLength(sb.length() - 1);
sb.append('}');
}
if (containsTerminator) {
sb.append(',');
}
sb.append("\"nonTerminator\":");
sb.append('{');
appendAttr(sb, map, this.grammar.getNonTerminators());
Assert.assertFalse(this.grammar.getNonTerminators().isEmpty());
sb.setLength(sb.length() - 1);
sb.append('}');
sb.append('}');
return sb.toString();
}
private void appendAttr(StringBuilder sb, Map> map, Set symbols) {
for (Symbol symbol : symbols) {
sb.append('\"').append(symbol).append("\":");
sb.append('\"');
Assert.assertFalse(map.get(symbol).isEmpty());
for (Symbol firstSymbol : map.get(symbol)) {
sb.append(firstSymbol).append(',');
}
sb.setLength(sb.length() - 1);
sb.append('\"');
sb.append(',');
}
}
@Override
public boolean isLegal() {
return isLegal;
}
protected void setLegal(boolean isLegal) {
this.isLegal = isLegal;
}
protected boolean epsilonInvolvedInFirstsOf(SymbolString symbolString) {
for (Symbol symbol : symbolString.getSymbols()) {
if (!this.firsts.get(symbol).contains(Symbol.EPSILON)) {
return false;
}
}
return true;
}
protected Set getFirstsOf(SymbolString symbolString) {
Set firstList = new HashSet<>();
for (Symbol symbol : symbolString.getSymbols()) {
/*
* 非ε的符号都在first集合中
*/
firstList.addAll(
SetUtils.extract(
this.firsts.get(symbol),
Symbol.EPSILON)
);
/*
* 如果当前符号不包含ε,那么first集合计算到此结束
*/
if (!this.firsts.get(symbol).contains(Symbol.EPSILON)) {
return firstList;
}
}
/*
* 所有符号的first集合都包含ε,则整个符号串的first集合包含ε
*/
firstList.add(Symbol.EPSILON);
return firstList;
}
}