com.ibm.icu.text.RBBITableBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html#License
/*
**********************************************************************
* Copyright (c) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
package com.ibm.icu.text;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.RBBIDataWrapper;
import com.ibm.icu.lang.UCharacter;
import com.ibm.icu.lang.UProperty;
import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
/**
* This class is part of the RBBI rule compiler.
* It builds the state transition table used by the RBBI runtime
* from the expression syntax tree generated by the rule scanner.
*
* This class is part of the RBBI implementation only.
* There is no user-visible public API here.
*/
class RBBITableBuilder {
//
// RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
// one for each state.
static class RBBIStateDescriptor {
boolean fMarked;
int fAccepting;
int fLookAhead;
SortedSet fTagVals;
int fTagsIdx;
Set fPositions; // Set of parse tree positions associated
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
int[] fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
RBBIStateDescriptor(int maxInputSymbol) {
fTagVals = new TreeSet();
fPositions = new HashSet();
fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.
}
}
private RBBIRuleBuilder fRB;
/** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
private int fRootIx;
/** D states (Aho's terminology). Index is state number. */
private List fDStates;
/** Synthesized safe table, a List of row arrays. */
private List fSafeTable;
//-----------------------------------------------------------------------------
//
// Constructor for RBBITableBuilder.
//
// rootNode is an index into the array of root nodes that is held by
// the overall RBBIRuleBuilder.
//-----------------------------------------------------------------------------
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) {
fRootIx = rootNodeIx;
fRB = rb;
fDStates = new ArrayList();
}
//-----------------------------------------------------------------------------
//
// RBBITableBuilder::buildForwardTable - This is the main function for building
// the DFA state transition table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void buildForwardTable() {
// If there were no rules, just return. This situation can easily arise
// for the reverse rules.
if (fRB.fTreeRoots[fRootIx]==null) {
return;
}
//
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substition expression.
//
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
System.out.println("Parse tree after flattening variable references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// If the rules contained any references to {bof}
// add a {bof} to the
// tree. Means that all matches must start out with the
// {bof} fake character.
//
if (fRB.fSetBuilder.sawBOF()) {
RBBINode bofTop = new RBBINode(RBBINode.opCat);
RBBINode bofLeaf = new RBBINode(RBBINode.leafChar);
bofTop.fLeftChild = bofLeaf;
bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
bofLeaf.fParent = bofTop;
bofLeaf.fVal = 2; // Reserved value for {bof}.
fRB.fTreeRoots[fRootIx] = bofTop;
}
//
// Add a unique right-end marker to the expression.
// Appears as a cat-node, left child being the original tree,
// right child being the end marker.
//
RBBINode cn = new RBBINode(RBBINode.opCat);
cn.fLeftChild = fRB.fTreeRoots[fRootIx];
fRB.fTreeRoots[fRootIx].fParent = cn;
cn.fRightChild = new RBBINode(RBBINode.endMark);
cn.fRightChild.fParent = cn;
fRB.fTreeRoots[fRootIx] = cn;
//
// Replace all references to UnicodeSets with the tree for the equivalent
// expression.
//
fRB.fTreeRoots[fRootIx].flattenSets();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
System.out.println("Parse tree after flattening Unicode Set references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// calculate the functions nullable, firstpos, lastpos and followpos on
// nodes in the parse tree.
// See the alogrithm description in Aho.
// Understanding how this works by looking at the code alone will be
// nearly impossible.
//
calcNullable(fRB.fTreeRoots[fRootIx]);
calcFirstPos(fRB.fTreeRoots[fRootIx]);
calcLastPos(fRB.fTreeRoots[fRootIx]);
calcFollowPos(fRB.fTreeRoots[fRootIx]);
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
System.out.print("\n");
printPosSets(fRB.fTreeRoots[fRootIx]);
}
//
// For "chained" rules, modify the followPos sets
//
if (fRB.fChainRules) {
calcChainedFollowPos(fRB.fTreeRoots[fRootIx]);
}
//
// BOF (start of input) test fixup.
//
if (fRB.fSetBuilder.sawBOF()) {
bofFixup();
}
//
// Build the DFA state transition tables.
//
buildStateTable();
flagAcceptingStates();
flagLookAheadStates();
flagTaggedStates();
//
// Update the global table of rule status {tag} values
// The rule builder has a global vector of status values that are common
// for all tables. Merge the ones from this table into the global set.
//
mergeRuleStatusVals();
}
//-----------------------------------------------------------------------------
//
// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcNullable(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.setRef ||
n.fType == RBBINode.endMark ) {
// These are non-empty leaf node types.
n.fNullable = false;
return;
}
if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
// Lookahead marker node. It's a leaf, so no recursion on children.
// It's nullable because it does not match any literal text from the input stream.
n.fNullable = true;
return;
}
// The node is not a leaf.
// Calculate nullable on its children.
calcNullable(n.fLeftChild);
calcNullable(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opCat) {
n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
n.fNullable = true;
}
else {
n.fNullable = false;
}
}
//-----------------------------------------------------------------------------
//
// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFirstPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fFirstPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate firstPos on its children.
calcFirstPos(n.fLeftChild);
calcFirstPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
if (n.fLeftChild.fNullable) {
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcLastPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fLastPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate lastPos on its children.
calcLastPos(n.fLeftChild);
calcLastPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
if (n.fRightChild.fNullable) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFollowPos(RBBINode n) {
if (n == null ||
n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark) {
return;
}
calcFollowPos(n.fLeftChild);
calcFollowPos(n.fRightChild);
// Aho rule #1
if (n.fType == RBBINode.opCat) {
for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) {
i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
}
}
// Aho rule #2
if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opPlus) {
for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) {
i.fFollowPos.addAll(n.fFirstPosSet);
}
}
}
//-----------------------------------------------------------------------------
//
// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged
// as roots of a rule to a destination vector.
//
//-----------------------------------------------------------------------------
void addRuleRootNodes(List dest, RBBINode node) {
if (node == null) {
return;
}
if (node.fRuleRoot) {
dest.add(node);
// Note: rules cannot nest. If we found a rule start node,
// no child node can also be a start node.
return;
}
addRuleRootNodes(dest, node.fLeftChild);
addRuleRootNodes(dest, node.fRightChild);
}
//-----------------------------------------------------------------------------
//
// calcChainedFollowPos. Modify the previously calculated followPos sets
// to implement rule chaining. NOT described by Aho
//
//-----------------------------------------------------------------------------
void calcChainedFollowPos(RBBINode tree) {
List endMarkerNodes = new ArrayList();
List leafNodes = new ArrayList();
// get a list of all endmarker nodes.
tree.findNodes(endMarkerNodes, RBBINode.endMark);
// get a list all leaf nodes
tree.findNodes(leafNodes, RBBINode.leafChar);
// Collect all leaf nodes that can start matches for rules
// with inbound chaining enabled, which is the union of the
// firstPosition sets from each of the rule root nodes.
List ruleRootNodes = new ArrayList();
addRuleRootNodes(ruleRootNodes, tree);
Set matchStartNodes = new HashSet();
for (RBBINode node: ruleRootNodes) {
if (node.fChainIn) {
matchStartNodes.addAll(node.fFirstPosSet);
}
}
// Iterate over all leaf nodes,
//
for (RBBINode tNode : leafNodes) {
RBBINode endNode = null;
// Identify leaf nodes that correspond to overall rule match positions.
// These include an endMarkerNode in their followPos sets.
for (RBBINode endMarkerNode : endMarkerNodes) {
if (tNode.fFollowPos.contains(endMarkerNode)) {
endNode = tNode;
break;
}
}
if (endNode == null) {
// node wasn't an end node. Try again with the next.
continue;
}
// We've got a node that can end a match.
// Line Break Specific hack: If this node's val correspond to the $CM char class,
// don't chain from it.
// TODO: Add rule syntax for this behavior, get specifics out of here and
// into the rule file.
if (fRB.fLBCMNoChain) {
int c = this.fRB.fSetBuilder.getFirstChar(endNode.fVal);
if (c != -1) {
// c == -1 occurs with sets containing only the {eof} marker string.
int cLBProp = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
if (cLBProp == UCharacter.LineBreak.COMBINING_MARK) {
continue;
}
}
}
// Now iterate over the nodes that can start a match, looking for ones
// with the same char class as our ending node.
for (RBBINode startNode : matchStartNodes) {
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (endNode.fVal == startNode.fVal) {
// The end val (character class) of one possible match is the
// same as the start of another.
// Add all nodes from the followPos of the start node to the
// followPos set of the end node, which will have the effect of
// letting matches transition from a match state at endNode
// to the second char of a match starting with startNode.
endNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
}
//-----------------------------------------------------------------------------
//
// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
// Do an swizzle similar to chaining, modifying the followPos set of
// the bofNode to include the followPos nodes from other {bot} nodes
// scattered through the tree.
//
// This function has much in common with calcChainedFollowPos().
//
//-----------------------------------------------------------------------------
void bofFixup() {
//
// The parse tree looks like this ...
// fTree root --.
// / \
// <#end node>
// / \
// rest
// of tree
//
// We will be adding things to the followPos set of the
//
RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
Assert.assrt(bofNode.fType == RBBINode.leafChar);
Assert.assrt(bofNode.fVal == 2);
// Get all nodes that can be the start a match of the user-written rules
// (excluding the fake bofNode)
// We want the nodes that can start a match in the
// part labeled "rest of tree"
//
Set matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
for (RBBINode startNode : matchStartNodes) {
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (startNode.fVal == bofNode.fVal) {
// We found a leaf node corresponding to a {bof} that was
// explicitly written into a rule.
// Add everything from the followPos set of this node to the
// followPos set of the fake bofNode at the start of the tree.
//
bofNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
//-----------------------------------------------------------------------------
//
// buildStateTable() Determine the set of runtime DFA states and the
// transition tables for these states, by the algorithm
// of fig. 3.44 in Aho.
//
// Most of the comments are quotes of Aho's psuedo-code.
//
//-----------------------------------------------------------------------------
void buildStateTable() {
//
// Add a dummy state 0 - the stop state. Not from Aho.
int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
fDStates.add(failState);
// initially, the only unmarked state in Dstates is firstpos(root),
// where toot is the root of the syntax tree for (r)#;
RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
fDStates.add(initialState);
// while there is an unmarked state T in Dstates do begin
for (;;) {
RBBIStateDescriptor T = null;
int tx;
for (tx=1; tx U = null;
for (RBBINode p : T.fPositions) {
if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) {
if (U == null) {
U = new HashSet();
}
U.addAll(p.fFollowPos);
}
}
// if U is not empty and not in DStates then
int ux = 0;
boolean UinDstates = false;
if (U != null) {
Assert.assrt(U.size() > 0);
int ix;
for (ix=0; ix endMarkerNodes = new ArrayList();
RBBINode endMarker;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
for (i=0; i= 0) {
if (sd.fPositions.contains(endMarker)) {
// Any non-zero value for fAccepting means this is an accepting node.
// The value is what will be returned to the user as the break status.
// If no other value was specified, force it to -1.
if (sd.fAccepting==0) {
// State hasn't been marked as accepting yet. Do it now.
sd.fAccepting = endMarker.fVal;
if (sd.fAccepting == 0) {
sd.fAccepting = -1;
}
}
if (sd.fAccepting==-1 && endMarker.fVal != 0) {
// Both lookahead and non-lookahead accepting for this state.
// Favor the look-ahead. Expedient for line break.
// TODO: need a more elegant resolution for conflicting rules.
sd.fAccepting = endMarker.fVal;
}
// implicit else:
// if sd.fAccepting already had a value other than 0 or -1, leave it be.
// If the end marker node is from a look-ahead rule, set
// the fLookAhead field for this state also.
if (endMarker.fLookAheadEnd) {
// TODO: don't change value if already set?
// TODO: allow for more than one active look-ahead rule in engine.
// Make value here an index to a side array in engine?
sd.fLookAhead = sd.fAccepting;
}
}
}
}
}
//-----------------------------------------------------------------------------
//
// flagLookAheadStates Very similar to flagAcceptingStates, above.
//
//-----------------------------------------------------------------------------
void flagLookAheadStates() {
List lookAheadNodes = new ArrayList();
RBBINode lookAheadNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
for (i=0; i tagNodes = new ArrayList();
RBBINode tagNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
for (i=0; i s0 = new TreeSet();
Integer izero = Integer.valueOf(0);
fRB.fStatusSets.put(s0, izero);
SortedSet s1 = new TreeSet();
s1.add(izero);
fRB.fStatusSets.put(s0, izero);
}
// For each state, check whether the state's status tag values are
// already entered into the status values array, and add them if not.
for (n=0; n statusVals = sd.fTagVals;
Integer arrayIndexI = fRB.fStatusSets.get(statusVals);
if (arrayIndexI == null) {
// This is the first encounter of this set of status values.
// Add them to the statusSets map, This map associates
// the set of status values with an index in the runtime status
// values array.
arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
fRB.fStatusSets.put(statusVals, arrayIndexI);
// Add the new set of status values to the vector of values that
// will eventually become the array used by the runtime engine.
fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
fRB.fRuleStatusVals.addAll(statusVals);
}
// Save the runtime array index back into the state descriptor.
sd.fTagsIdx = arrayIndexI.intValue();
}
}
//-----------------------------------------------------------------------------
//
// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
// for each node in the tree.
//
//-----------------------------------------------------------------------------
void printPosSets(RBBINode n) {
if (n==null) {
return;
}
RBBINode.printNode(n);
System.out.print(" Nullable: " + n.fNullable);
System.out.print(" firstpos: ");
printSet(n.fFirstPosSet);
System.out.print(" lastpos: ");
printSet(n.fLastPosSet);
System.out.print(" followpos: ");
printSet(n.fFollowPos);
printPosSets(n.fLeftChild);
printPosSets(n.fRightChild);
}
/**
* Find duplicate (redundant) character classes. Begin looking with categories.first.
* Duplicates, if found are returned in the categories parameter.
* This is an iterator-like function, used to identify character classes
* (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
* @internal
*/
boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) {
int numStates = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
int table_base = 0;
int table_dupl = 0;
for (; categories.first < numCols-1; ++categories.first) {
for (categories.second=categories.first+1; categories.second < numCols; ++categories.second) {
for (int state=0; state duplState) {
newVal = existingVal - 1;
}
sd.fDtran[col] = newVal;
}
if (sd.fAccepting == duplState) {
sd.fAccepting = keepState;
} else if (sd.fAccepting > duplState) {
sd.fAccepting--;
}
if (sd.fLookAhead == duplState) {
sd.fLookAhead = keepState;
} else if (sd.fLookAhead > duplState) {
sd.fLookAhead--;
}
}
}
/**
* Remove a duplicate state from the safe table.
* @param duplStates The duplicate pair of states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
* @internal
*/
void removeSafeState(IntPair duplStates) {
final int keepState = duplStates.first;
final int duplState = duplStates.second;
assert(keepState < duplState);
assert(duplState < fSafeTable.size());
fSafeTable.remove(duplState);
int numStates = fSafeTable.size();
for (int state=0; state duplState) {
newVal = existingVal - 1;
}
row[col] = (short)newVal;
}
}
}
/**
* Check for, and remove duplicate states (table rows).
* @return the number of states removed.
* @internal
*/
int removeDuplicateStates() {
IntPair dupls = new IntPair(3, 0);
int numStatesRemoved = 0;
while (findDuplicateState(dupls)) {
// System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls);
++numStatesRemoved;
}
return numStatesRemoved;
}
/**
* Calculate the size in bytes of the serialized form of this state transition table,
* which is identical to the ICU4C runtime form.
* Refer to common/rbbidata.h from ICU4C for the declarations of the structures
* being matched by this calculation.
*/
int getTableSize() {
if (fRB.fTreeRoots[fRootIx] == null) {
return 0;
}
int size = 16; // The header of 4 ints, with no rows to the table.
int numRows = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
int rowSize = 8 + 2*numCols;
size += numRows * rowSize;
size = (size + 7) & ~7; // round up to a multiple of 8 bytes
return size;
}
/**
* Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
* RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
* in common/rbbidata.h
*/
RBBIDataWrapper.RBBIStateTable exportTable() {
int state;
int col;
RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
if (fRB.fTreeRoots[fRootIx] == null) {
return table;
}
Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
fDStates.size() < 0x7fff);
table.fNumStates = fDStates.size();
// Size of table size in shorts.
// the "4" is the size of struct RBBIStateTableRow, the row header part only.
int rowLen = 4 + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts.
int tableSize = (getTableSize() - 16) / 2; // fTable length in shorts.
table.fTable = new short[tableSize];
table.fRowLen = rowLen * 2; // Row length in bytes.
if (fRB.fLookAheadHardBreak) {
table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
}
if (fRB.fSetBuilder.sawBOF()) {
table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
}
int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
for (state=0; state
// Row 0 is the stop state.
// Row 1 is the start sate.
// Row 2 and beyond are other states, initially one per char class, but
// after initial construction, many of the states will be combined, compacting the table.)
// The String holds the nextState data only. The four leading fields of a row, fAccepting,
// fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
assert(fSafeTable == null);
fSafeTable = new ArrayList();
for (int row=0; row s) {
for (RBBINode n : s) {
RBBINode.printInt(n.fSerialNum, 8);
}
System.out.println();
}
//-----------------------------------------------------------------------------
//
// printStates Debug Function. Dump the fully constructed state transition table.
//
//-----------------------------------------------------------------------------
void printStates() {
int c; // input "character"
int n; // state number
System.out.print("state | i n p u t s y m b o l s \n");
System.out.print(" | Acc LA Tag");
for (c=0; c tbl = fRB.fRuleStatusVals;
System.out.print("index | tags \n");
System.out.print("-------------------\n");
while (nextRecord < tbl.size()) {
thisRecord = nextRecord;
nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1;
RBBINode.printInt(thisRecord, 7);
for (i=thisRecord+1; i