![JAR search and dependency download from the Maven repository](/logo.png)
pingbu.nlp.Grammar Maven / Gradle / Ivy
package pingbu.nlp;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import pingbu.logger.Logger;
/**
* 语法树模块
*
* @author pingbu
*/
public final class Grammar {
private static final String TAG = Grammar.class.getSimpleName();
private static final boolean MT = false;
private static final boolean LOG = false;
private static final boolean LOG_RESULT = false;
private static void log(final String fmt, final Object... args) {
if (LOG)
Logger.d(TAG, fmt, args);
}
private static void log_result(final String fmt, final Object... args) {
if (LOG_RESULT)
Logger.d(TAG, fmt, args);
}
protected static final class ItemSlot {
public String name;
int pos, length;
}
protected static final class ItemParam {
public String key, value;
}
private final Subtree mGrammarTree;
private final List mLexicons;
protected Grammar(final Subtree tree, final Collection lexicons) {
mGrammarTree = tree;
mLexicons = new ArrayList<>(lexicons);
}
/**
* 语法搜索结果
*/
public static final class SearchResult {
/**
* 语法参数
*/
public final Map params;
/**
* 语法得分,满分1.0
*/
public final double score;
/**
* 语法搜索时间,单位秒
*/
public final double time;
private SearchResult(final Map params, final double score, final double time) {
this.params = params;
this.score = score;
this.time = time;
}
}
private final class SearchContext {
private final long mTime0 = System.currentTimeMillis();
private final Lexicon.SearchResult[][] mLexiconResults = new Lexicon.SearchResult[mLexicons.size()][];
private final String mText;
private final LexiconSearchResultList[] mPosLexiconSearchResults;
SearchContext(final String text) {
mText = text;
mPosLexiconSearchResults = new LexiconSearchResultList[text.length()];
}
private final class LexiconSearchProc implements Runnable {
private final int mLexiconIndex;
Collection results;
LexiconSearchProc(int lexiconIndex) {
mLexiconIndex = lexiconIndex;
}
@Override
public final void run() {
results = mLexicons.get(mLexiconIndex).search(mText);
}
}
final void searchLexicons() {
if (mLexicons != null && !mLexicons.isEmpty()) {
final long t0 = System.currentTimeMillis();
int lexicons = 0;
LexiconSearchProc[] lexiconSearchProcs = new LexiconSearchProc[mLexicons
.size()];
Thread[] lexiconSearchThreads = null;
if (MT)
lexiconSearchThreads = new Thread[mLexicons.size()];
for (int lexicon = 0; lexicon < mLexicons.size(); ++lexicon) {
lexiconSearchProcs[lexicon] = new LexiconSearchProc(lexicon);
if (MT) {
lexiconSearchThreads[lexicon] = new Thread(lexiconSearchProcs[lexicon]);
lexiconSearchThreads[lexicon].start();
} else {
lexiconSearchProcs[lexicon].run();
}
}
for (int lexicon = 0; lexicon < mLexicons.size(); ++lexicon) {
if (MT)
try {
lexiconSearchThreads[lexicon].join();
} catch (Exception e) {
e.printStackTrace();
throw new RuntimeException(e);
}
final Collection rs = lexiconSearchProcs[lexicon].results;
if (!rs.isEmpty()) {
mLexiconResults[lexicon] = rs.toArray(new Lexicon.SearchResult[rs.size()]);
++lexicons;
}
}
final long t = System.currentTimeMillis();
log_result(" %d lexicons search finish, %.3fs time used", lexicons, (t - t0) / 1000.);
}
}
private void _initSourceMatrix() {
for (int pos = 0; pos < mPosLexiconSearchResults.length; ++pos) {
mPosLexiconSearchResults[pos] = new LexiconSearchResultList();
final LexiconSearchResult lr = new LexiconSearchResult();
lr.unitResult = new UnitCharResult(mText.charAt(pos));
lr.length = 1;
mPosLexiconSearchResults[pos].add(lr);
}
for (int lexicon = 0; lexicon < mLexiconResults.length; ++lexicon) {
final Lexicon.SearchResult[] rs = mLexiconResults[lexicon];
if (rs != null) {
for (Lexicon.SearchResult r : rs) {
LexiconSearchResult lr = new LexiconSearchResult();
lr.unitResult = new UnitLexiconSlotResult(
mLexicons.get(lexicon), mText, r);
lr.length = r.length;
mPosLexiconSearchResults[r.pos].add(lr);
}
}
}
}
private SearchNode mBestPath = null;
private int mBestPathDepth = 0;
private final ArrayList mBestPathParams = new ArrayList();
private final ArrayList mBestPathSlots = new ArrayList();
private double mBestPathScore = 0;
private final class SearchNavigator implements Subtree.Cursor.Navigator {
private int mDepth = 0;
private SearchNodes mNodes = new SearchNodes();
private final ArrayList> mParamss = new ArrayList<>();
private final ArrayList mSlots = new ArrayList<>();
SearchNavigator() {
for (int pos = 0; pos < mText.length(); ++pos) {
final SearchNode node = new SearchNode();
node.pos = pos;
mNodes.nodes.add(node);
}
}
@Override
public boolean extendLexicon() {
return false;
}
@Override
public boolean pushUnit(final Unit unit) {
final SearchNodes nextNodes = new SearchNodes();
for (final SearchNode node : mNodes.nodes)
if (node.pos < mPosLexiconSearchResults.length)
for (LexiconSearchResult r : mPosLexiconSearchResults[node.pos]) {
final SearchNode nextNode = new SearchNode();
nextNode.unitScore = r.unitResult.compare(unit);
if (nextNode.unitScore > 0) {
nextNode.score = nextNode.unitScore * r.unitResult.getInnerScore();
if (node.unitResult != null)
nextNode.score += node.score + node.unitScore * nextNode.unitScore;
nextNode.length = node.length + r.length;
nextNode.pos = node.pos + r.length;
nextNode.unitResult = r.unitResult;
nextNode.prev = node;
nextNodes.nodes.add(nextNode);
}
}
if (nextNodes.nodes.isEmpty())
return false;
nextNodes.prev = mNodes;
mNodes = nextNodes;
++mDepth;
return true;
}
@Override
public void popUnit() {
--mDepth;
mNodes = mNodes.prev;
}
@Override
public void pushParams(final Collection params) {
mParamss.add(params);
}
@Override
public void popParams(final Collection params) {
mParamss.remove(params);
}
@Override
public Object beginSlot() {
return (Integer) mDepth;
}
@Override
public void pushSlot(final String name, final Object beginPos) {
final ItemSlot slot = new ItemSlot();
slot.name = name;
slot.pos = (Integer) beginPos;
slot.length = mDepth - slot.pos;
mSlots.add(slot);
}
@Override
public void popSlot() {
mSlots.remove(mSlots.size() - 1);
}
@Override
public void endOnePath() {
for (SearchNode node : mNodes.nodes) {
double score = node.score / (Math.max(mText.length(), node.length) - 1);
if (score > mBestPathScore) {
mBestPathScore = score;
mBestPathDepth = mDepth;
mBestPath = node;
mBestPathParams.clear();
for (final Collection params : mParamss)
mBestPathParams.addAll(params);
mBestPathSlots.clear();
mBestPathSlots.addAll(mSlots);
}
}
}
}
private void _logBestPath(SearchNode[] nodes) {
log("Best path, score=" + mBestPathScore);
for (int i = 0, n = nodes.length; i < n; ++i) {
final SearchNode node = nodes[i];
log(" unit[%d] %s - %s", i, node.unitResult.getId(), node.unitResult.getText());
}
}
final SearchResult searchGrammar() {
final long t0 = System.currentTimeMillis();
_initSourceMatrix();
mGrammarTree.newCursor(null).navigate(new SearchNavigator());
final SearchNode[] nodes = new SearchNode[mBestPathDepth];
for (SearchNode n = mBestPath; n != null; n = n.prev)
if (n.unitResult != null)
nodes[--mBestPathDepth] = n;
_logBestPath(nodes);
final Map slots = new HashMap<>();
for (final ItemSlot slotInfo : mBestPathSlots) {
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < slotInfo.length; ++i) {
final Unit.Result unitResult = nodes[slotInfo.pos + i].unitResult;
if (unitResult != null) {
final String unitText = unitResult.getText();
if (unitText != null)
sb.append(unitText);
}
}
if (sb.length() > 0) {
final String v = sb.toString();
slots.put(slotInfo.name, v);
log(" grammar slot [%d,%d] %s = %s", slotInfo.pos, slotInfo.length, slotInfo.name, v);
}
}
for (final ItemParam param : mBestPathParams) {
slots.put(param.key, param.value);
log(" grammar param %s = %s", param.key, param.value);
}
for (final SearchNode node : nodes) {
final Unit.Result unitResult = node.unitResult;
if (unitResult == null)
continue;
if (!(unitResult instanceof UnitLexiconSlotResult))
continue;
final String unitText = unitResult.getText();
if (unitText == null)
continue;
final Lexicon lexicon = ((UnitLexiconSlotResult) unitResult).mLexicon;
int id = lexicon.findItem(unitText);
if (id >= 0)
for (final ItemParam param : lexicon.getItemParams(id)) {
String v = param.value;
if (v.equals("<0>"))
v = unitText;
slots.put(param.key, v);
log(" lexicon item param %s = %s", param.key, v);
}
}
for (;;) {
boolean pending = false;
for (final Map.Entry slot : slots.entrySet()) {
final String v = slot.getValue();
if (v.startsWith("<") && v.endsWith(">")) {
final String v1 = slots.get(v.substring(1, v.length() - 1));
if (v1 == null)
throw new RuntimeException("slot " + v + " not found");
if (v1.startsWith("<") && v1.endsWith(">"))
pending = true;
else
slot.setValue(v1);
}
}
if (!pending)
break;
}
final Set toRemoveSlots = new HashSet();
for (final String slot : slots.keySet())
if (slot.startsWith("$") || slot.startsWith("Digit:"))
toRemoveSlots.add(slot);
for (final String slot : toRemoveSlots)
slots.remove(slot);
long t = System.currentTimeMillis();
log_result(" tree search finish, %.3fs time used", (t - t0) / 1000.);
return new SearchResult(slots, mBestPathScore, (t - mTime0) / 1000.);
}
}
private static final class SearchNode {
SearchNode prev = null;
int pos = 0, length = 0;
Unit.Result unitResult = null;
double unitScore = 0, score = 0;
}
private static final class SearchNodes {
SearchNodes prev = null;
final List nodes = new ArrayList<>();
}
private static final class LexiconSearchResult {
Unit.Result unitResult;
int length;
}
private static final class LexiconSearchResultList extends ArrayList {
private static final long serialVersionUID = 1L;
}
/**
* 搜索语法
* @param text 待搜索的输入文本
* @return 搜索结果
*/
public final SearchResult search(final String text) {
log_result("*** Searching for %s:", text);
final SearchContext searchContext = new SearchContext(text);
searchContext.searchLexicons(); // 先搜索各个词典
final SearchResult rr = searchContext.searchGrammar(); // 再搜索语法树
if (rr != null) {
log_result("RESULT: %f", rr.score);
for (final Map.Entry param : rr.params.entrySet())
log_result(" <%s>=%s", param.getKey(), param.getValue());
} else {
log_result("NO RESULT!");
}
return rr;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy