com.ibm.icu.text.RBBITableBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
* Copyright (c) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
package com.ibm.icu.text;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import com.ibm.icu.impl.Assert;
import com.ibm.icu.impl.RBBIDataWrapper;
import com.ibm.icu.text.RBBIRuleBuilder.IntPair;
/**
* This class is part of the RBBI rule compiler.
* It builds the state transition table used by the RBBI runtime
* from the expression syntax tree generated by the rule scanner.
*
* This class is part of the RBBI implementation only.
* There is no user-visible public API here.
*/
class RBBITableBuilder {
//
// RBBIStateDescriptor - The DFA is initially constructed as a set of these descriptors,
// one for each state.
static class RBBIStateDescriptor {
boolean fMarked;
int fAccepting;
int fLookAhead;
SortedSet fTagVals;
int fTagsIdx;
Set fPositions; // Set of parse tree positions associated
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
int[] fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
RBBIStateDescriptor(int maxInputSymbol) {
fTagVals = new TreeSet<>();
fPositions = new HashSet<>();
fDtran = new int[maxInputSymbol+1]; // fDtran needs to be pre-sized.
// It is indexed by input symbols, and will
// hold the next state number for each
// symbol.
}
}
private RBBIRuleBuilder fRB;
/** The array index into RBBIRuleBuilder.fTreeRoots for the parse tree to operate on. */
private int fRootIx;
/** D states (Aho's terminology). Index is state number. */
private List fDStates;
/** Synthesized safe table, a List of row arrays. */
private List fSafeTable;
private static final int MAX_STATE_FOR_8BITS_TABLE = 255;
/** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
int[] fLookAheadRuleMap;
/** Counter used when assigning lookahead rule numbers.
* Contains the last look-ahead number already in use.
* The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
* for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */
int fLASlotsInUse = RBBIDataWrapper.ACCEPTING_UNCONDITIONAL;
//-----------------------------------------------------------------------------
//
// Constructor for RBBITableBuilder.
//
// rootNode is an index into the array of root nodes that is held by
// the overall RBBIRuleBuilder.
//-----------------------------------------------------------------------------
RBBITableBuilder(RBBIRuleBuilder rb, int rootNodeIx) {
fRootIx = rootNodeIx;
fRB = rb;
fDStates = new ArrayList<>();
}
//-----------------------------------------------------------------------------
//
// RBBITableBuilder::buildForwardTable - This is the main function for building
// the DFA state transition table from the RBBI rules parse tree.
//
//-----------------------------------------------------------------------------
void buildForwardTable() {
// If there were no rules, just return. This situation can easily arise
// for the reverse rules.
if (fRB.fTreeRoots[fRootIx]==null) {
return;
}
//
// Walk through the tree, replacing any references to $variables with a copy of the
// parse tree for the substitution expression.
//
fRB.fTreeRoots[fRootIx] = fRB.fTreeRoots[fRootIx].flattenVariables();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("ftree")>=0) {
System.out.println("Parse tree after flattening variable references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// If the rules contained any references to {bof}
// add a {bof} to the
// tree. Means that all matches must start out with the
// {bof} fake character.
//
if (fRB.fSetBuilder.sawBOF()) {
RBBINode bofTop = new RBBINode(RBBINode.opCat);
RBBINode bofLeaf = new RBBINode(RBBINode.leafChar);
bofTop.fLeftChild = bofLeaf;
bofTop.fRightChild = fRB.fTreeRoots[fRootIx];
bofLeaf.fParent = bofTop;
bofLeaf.fVal = 2; // Reserved value for {bof}.
fRB.fTreeRoots[fRootIx] = bofTop;
}
//
// Add a unique right-end marker to the expression.
// Appears as a cat-node, left child being the original tree,
// right child being the end marker.
//
RBBINode cn = new RBBINode(RBBINode.opCat);
cn.fLeftChild = fRB.fTreeRoots[fRootIx];
fRB.fTreeRoots[fRootIx].fParent = cn;
RBBINode endMarkerNode = cn.fRightChild = new RBBINode(RBBINode.endMark);
cn.fRightChild.fParent = cn;
fRB.fTreeRoots[fRootIx] = cn;
//
// Replace all references to UnicodeSets with the tree for the equivalent
// expression.
//
fRB.fTreeRoots[fRootIx].flattenSets();
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("stree")>=0) {
System.out.println("Parse tree after flattening Unicode Set references.");
fRB.fTreeRoots[fRootIx].printTree(true);
}
//
// calculate the functions nullable, firstpos, lastpos and followpos on
// nodes in the parse tree.
// See the algorithm description in Aho.
// Understanding how this works by looking at the code alone will be
// nearly impossible.
//
calcNullable(fRB.fTreeRoots[fRootIx]);
calcFirstPos(fRB.fTreeRoots[fRootIx]);
calcLastPos(fRB.fTreeRoots[fRootIx]);
calcFollowPos(fRB.fTreeRoots[fRootIx]);
if (fRB.fDebugEnv!=null && fRB.fDebugEnv.indexOf("pos")>=0) {
System.out.print("\n");
printPosSets(fRB.fTreeRoots[fRootIx]);
}
//
// For "chained" rules, modify the followPos sets
//
if (fRB.fChainRules) {
calcChainedFollowPos(fRB.fTreeRoots[fRootIx], endMarkerNode);
}
//
// BOF (start of input) test fixup.
//
if (fRB.fSetBuilder.sawBOF()) {
bofFixup();
}
//
// Build the DFA state transition tables.
//
buildStateTable();
mapLookAheadRules();
flagAcceptingStates();
flagLookAheadStates();
flagTaggedStates();
//
// Update the global table of rule status {tag} values
// The rule builder has a global vector of status values that are common
// for all tables. Merge the ones from this table into the global set.
//
mergeRuleStatusVals();
}
//-----------------------------------------------------------------------------
//
// calcNullable. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcNullable(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.setRef ||
n.fType == RBBINode.endMark ) {
// These are non-empty leaf node types.
n.fNullable = false;
return;
}
if (n.fType == RBBINode.lookAhead || n.fType == RBBINode.tag) {
// Lookahead marker node. It's a leaf, so no recursion on children.
// It's nullable because it does not match any literal text from the input stream.
n.fNullable = true;
return;
}
// The node is not a leaf.
// Calculate nullable on its children.
calcNullable(n.fLeftChild);
calcNullable(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fNullable = n.fLeftChild.fNullable || n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opCat) {
n.fNullable = n.fLeftChild.fNullable && n.fRightChild.fNullable;
}
else if (n.fType == RBBINode.opStar || n.fType == RBBINode.opQuestion) {
n.fNullable = true;
}
else {
n.fNullable = false;
}
}
//-----------------------------------------------------------------------------
//
// calcFirstPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFirstPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fFirstPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate firstPos on its children.
calcFirstPos(n.fLeftChild);
calcFirstPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
if (n.fLeftChild.fNullable) {
n.fFirstPosSet.addAll(n.fRightChild.fFirstPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fFirstPosSet.addAll(n.fLeftChild.fFirstPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcLastPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcLastPos(RBBINode n) {
if (n == null) {
return;
}
if (n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark ||
n.fType == RBBINode.lookAhead ||
n.fType == RBBINode.tag) {
// These are non-empty leaf node types.
n.fLastPosSet.add(n);
return;
}
// The node is not a leaf.
// Calculate lastPos on its children.
calcLastPos(n.fLeftChild);
calcLastPos(n.fRightChild);
// Apply functions from table 3.40 in Aho
if (n.fType == RBBINode.opOr) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
}
else if (n.fType == RBBINode.opCat) {
n.fLastPosSet.addAll(n.fRightChild.fLastPosSet);
if (n.fRightChild.fNullable) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
else if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opQuestion ||
n.fType == RBBINode.opPlus) {
n.fLastPosSet.addAll(n.fLeftChild.fLastPosSet);
}
}
//-----------------------------------------------------------------------------
//
// calcFollowPos. Impossible to explain succinctly. See Aho, section 3.9
//
//-----------------------------------------------------------------------------
void calcFollowPos(RBBINode n) {
if (n == null ||
n.fType == RBBINode.leafChar ||
n.fType == RBBINode.endMark) {
return;
}
calcFollowPos(n.fLeftChild);
calcFollowPos(n.fRightChild);
// Aho rule #1
if (n.fType == RBBINode.opCat) {
for (RBBINode i /* is 'i' in Aho's description */ : n.fLeftChild.fLastPosSet) {
i.fFollowPos.addAll(n.fRightChild.fFirstPosSet);
}
}
// Aho rule #2
if (n.fType == RBBINode.opStar ||
n.fType == RBBINode.opPlus) {
for (RBBINode i /* again, n and i are the names from Aho's description */ : n.fLastPosSet) {
i.fFollowPos.addAll(n.fFirstPosSet);
}
}
}
//-----------------------------------------------------------------------------
//
// addRuleRootNodes Recursively walk a parse tree, adding all nodes flagged
// as roots of a rule to a destination vector.
//
//-----------------------------------------------------------------------------
void addRuleRootNodes(List dest, RBBINode node) {
if (node == null) {
return;
}
if (node.fRuleRoot) {
dest.add(node);
// Note: rules cannot nest. If we found a rule start node,
// no child node can also be a start node.
return;
}
addRuleRootNodes(dest, node.fLeftChild);
addRuleRootNodes(dest, node.fRightChild);
}
//-----------------------------------------------------------------------------
//
// calcChainedFollowPos. Modify the previously calculated followPos sets
// to implement rule chaining. NOT described by Aho
//
//-----------------------------------------------------------------------------
void calcChainedFollowPos(RBBINode tree, RBBINode endMarkNode) {
List leafNodes = new ArrayList<>();
// get a list all leaf nodes
tree.findNodes(leafNodes, RBBINode.leafChar);
// Collect all leaf nodes that can start matches for rules
// with inbound chaining enabled, which is the union of the
// firstPosition sets from each of the rule root nodes.
List ruleRootNodes = new ArrayList<>();
addRuleRootNodes(ruleRootNodes, tree);
Set matchStartNodes = new HashSet<>();
for (RBBINode node: ruleRootNodes) {
if (node.fChainIn) {
matchStartNodes.addAll(node.fFirstPosSet);
}
}
// Iterate over all leaf nodes,
//
for (RBBINode endNode : leafNodes) {
// Identify leaf nodes that correspond to overall rule match positions.
// These include the endMarkNode in their followPos sets.
//
// Note: do not consider other end marker nodes, those that are added to
// look-ahead rules. These can't chain; a match immediately stops
// further matching. This leaves exactly one end marker node, the one
// at the end of the complete tree.
if (!endNode.fFollowPos.contains(endMarkNode)) {
continue;
}
// We've got a node that can end a match.
// Now iterate over the nodes that can start a match, looking for ones
// with the same char class as our ending node.
for (RBBINode startNode : matchStartNodes) {
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (endNode.fVal == startNode.fVal) {
// The end val (character class) of one possible match is the
// same as the start of another.
// Add all nodes from the followPos of the start node to the
// followPos set of the end node, which will have the effect of
// letting matches transition from a match state at endNode
// to the second char of a match starting with startNode.
endNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
}
//-----------------------------------------------------------------------------
//
// bofFixup. Fixup for state tables that include {bof} beginning of input testing.
// Do an swizzle similar to chaining, modifying the followPos set of
// the bofNode to include the followPos nodes from other {bot} nodes
// scattered through the tree.
//
// This function has much in common with calcChainedFollowPos().
//
//-----------------------------------------------------------------------------
void bofFixup() {
//
// The parse tree looks like this ...
// fTree root --.
// / \
// <#end node>
// / \
// rest
// of tree
//
// We will be adding things to the followPos set of the
//
RBBINode bofNode = fRB.fTreeRoots[fRootIx].fLeftChild.fLeftChild;
Assert.assrt(bofNode.fType == RBBINode.leafChar);
Assert.assrt(bofNode.fVal == 2);
// Get all nodes that can be the start a match of the user-written rules
// (excluding the fake bofNode)
// We want the nodes that can start a match in the
// part labeled "rest of tree"
//
Set matchStartNodes = fRB.fTreeRoots[fRootIx].fLeftChild.fRightChild.fFirstPosSet;
for (RBBINode startNode : matchStartNodes) {
if (startNode.fType != RBBINode.leafChar) {
continue;
}
if (startNode.fVal == bofNode.fVal) {
// We found a leaf node corresponding to a {bof} that was
// explicitly written into a rule.
// Add everything from the followPos set of this node to the
// followPos set of the fake bofNode at the start of the tree.
//
bofNode.fFollowPos.addAll(startNode.fFollowPos);
}
}
}
//-----------------------------------------------------------------------------
//
// buildStateTable() Determine the set of runtime DFA states and the
// transition tables for these states, by the algorithm
// of fig. 3.44 in Aho.
//
// Most of the comments are quotes of Aho's psuedo-code.
//
//-----------------------------------------------------------------------------
void buildStateTable() {
//
// Add a dummy state 0 - the stop state. Not from Aho.
int lastInputSymbol = fRB.fSetBuilder.getNumCharCategories() - 1;
RBBIStateDescriptor failState = new RBBIStateDescriptor(lastInputSymbol);
fDStates.add(failState);
// initially, the only unmarked state in Dstates is firstpos(root),
// where toot is the root of the syntax tree for (r)#;
RBBIStateDescriptor initialState = new RBBIStateDescriptor(lastInputSymbol);
initialState.fPositions.addAll(fRB.fTreeRoots[fRootIx].fFirstPosSet);
fDStates.add(initialState);
// while there is an unmarked state T in Dstates do begin
for (;;) {
RBBIStateDescriptor T = null;
int tx;
for (tx=1; tx U = null;
for (RBBINode p : T.fPositions) {
if ((p.fType == RBBINode.leafChar) && (p.fVal == a)) {
if (U == null) {
U = new HashSet<>();
}
U.addAll(p.fFollowPos);
}
}
// if U is not empty and not in DStates then
int ux = 0;
boolean UinDstates = false;
if (U != null) {
Assert.assrt(U.size() > 0);
int ix;
for (ix=0; ix 0);
int laSlot = fLookAheadRuleMap[ruleNum];
if (laSlot != 0) {
if (laSlotForState == 0) {
laSlotForState = laSlot;
} else {
// TODO: figure out if this can fail, change to setting an error code if so.
assert(laSlot == laSlotForState);
}
}
}
if (!sawLookAheadNode) {
continue;
}
if (laSlotForState == 0) {
laSlotForState = ++fLASlotsInUse;
}
// For each look ahead node covered by this state,
// set the mapping from the node's rule number to the look ahead slot.
// There can be multiple nodes/rule numbers going to the same la slot.
for (RBBINode node: sd.fPositions) {
if (node.fType != RBBINode.lookAhead) {
continue;
}
int ruleNum = node.fVal; // Set when rule was originally parsed.
int existingVal = fLookAheadRuleMap[ruleNum];
assert(existingVal == 0 || existingVal == laSlotForState);
fLookAheadRuleMap[ruleNum] = laSlotForState;
}
}
}
//-----------------------------------------------------------------------------
//
// flagAcceptingStates Identify accepting states.
// First get a list of all of the end marker nodes.
// Then, for each state s,
// if s contains one of the end marker nodes in its list of tree positions then
// s is an accepting state.
//
//-----------------------------------------------------------------------------
void flagAcceptingStates() {
List endMarkerNodes = new ArrayList<>();
RBBINode endMarker;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(endMarkerNodes, RBBINode.endMark);
for (i=0; i lookAheadNodes = new ArrayList<>();
RBBINode lookAheadNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(lookAheadNodes, RBBINode.lookAhead);
for (i=0; i tagNodes = new ArrayList<>();
RBBINode tagNode;
int i;
int n;
fRB.fTreeRoots[fRootIx].findNodes(tagNodes, RBBINode.tag);
for (i=0; i s0 = new TreeSet<>(); // mapping for rules with no explicit tagging
fRB.fStatusSets.put(s0, Integer.valueOf(0)); // (key is an empty set).
SortedSet s1 = new TreeSet<>(); // mapping for rules with explicit tagging of {0}
s1.add(Integer.valueOf(0));
fRB.fStatusSets.put(s1, Integer.valueOf(0));
}
// For each state, check whether the state's status tag values are
// already entered into the status values array, and add them if not.
for (n=0; n statusVals = sd.fTagVals;
Integer arrayIndexI = fRB.fStatusSets.get(statusVals);
if (arrayIndexI == null) {
// This is the first encounter of this set of status values.
// Add them to the statusSets map, This map associates
// the set of status values with an index in the runtime status
// values array.
arrayIndexI = Integer.valueOf(fRB.fRuleStatusVals.size());
fRB.fStatusSets.put(statusVals, arrayIndexI);
// Add the new set of status values to the vector of values that
// will eventually become the array used by the runtime engine.
fRB.fRuleStatusVals.add(Integer.valueOf(statusVals.size()));
fRB.fRuleStatusVals.addAll(statusVals);
}
// Save the runtime array index back into the state descriptor.
sd.fTagsIdx = arrayIndexI.intValue();
}
}
//-----------------------------------------------------------------------------
//
// printPosSets Debug function. Dump Nullable, firstpos, lastpos and followpos
// for each node in the tree.
//
//-----------------------------------------------------------------------------
void printPosSets(RBBINode n) {
if (n==null) {
return;
}
RBBINode.printNode(n);
System.out.print(" Nullable: " + n.fNullable);
System.out.print(" firstpos: ");
printSet(n.fFirstPosSet);
System.out.print(" lastpos: ");
printSet(n.fLastPosSet);
System.out.print(" followpos: ");
printSet(n.fFollowPos);
printPosSets(n.fLeftChild);
printPosSets(n.fRightChild);
}
/**
* Find duplicate (redundant) character classes. Begin looking with categories.first.
* Duplicates, if found are returned in the categories parameter.
* This is an iterator-like function, used to identify character classes
* (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
* @internal
*/
boolean findDuplCharClassFrom(RBBIRuleBuilder.IntPair categories) {
int numStates = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
int table_base = 0;
int table_dupl = 0;
for (; categories.first < numCols-1; ++categories.first) {
// Note: dictionary & non-dictionary columns cannot be merged.
// The limitSecond value prevents considering mixed pairs.
// Dictionary categories are >= DictCategoriesStart.
// Non dict categories are < DictCategoriesStart.
int limitSecond = categories.first < fRB.fSetBuilder.getDictCategoriesStart() ?
fRB.fSetBuilder.getDictCategoriesStart() : numCols;
for (categories.second=categories.first+1; categories.second < limitSecond; ++categories.second) {
for (int state=0; state duplState) {
newVal = existingVal - 1;
}
sd.fDtran[col] = newVal;
}
}
}
/**
* Remove a duplicate state from the safe table.
* @param duplStates The duplicate pair of states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
* @internal
*/
void removeSafeState(IntPair duplStates) {
final int keepState = duplStates.first;
final int duplState = duplStates.second;
assert(keepState < duplState);
assert(duplState < fSafeTable.size());
fSafeTable.remove(duplState);
int numStates = fSafeTable.size();
for (int state=0; state duplState) {
newVal = existingVal - 1;
}
row[col] = (short)newVal;
}
}
}
/**
* Check for, and remove duplicate states (table rows).
* @return the number of states removed.
* @internal
*/
int removeDuplicateStates() {
IntPair dupls = new IntPair(3, 0);
int numStatesRemoved = 0;
while (findDuplicateState(dupls)) {
// System.out.printf("Removing duplicate states (%d, %d)\n", dupls.first, dupls.second);
removeState(dupls);
++numStatesRemoved;
}
return numStatesRemoved;
}
/**
* Calculate the size in bytes of the serialized form of this state transition table,
* which is identical to the ICU4C runtime form.
* Refer to common/rbbidata.h from ICU4C for the declarations of the structures
* being matched by this calculation.
*/
int getTableSize() {
if (fRB.fTreeRoots[fRootIx] == null) {
return 0;
}
int size = RBBIDataWrapper.RBBIStateTable.fHeaderSize; // The header, with no rows to the table.
int numRows = fDStates.size();
int numCols = fRB.fSetBuilder.getNumCharCategories();
boolean use8Bits = numRows <= MAX_STATE_FOR_8BITS_TABLE;
int rowSize = (use8Bits ? 1 : 2 ) * (RBBIDataWrapper.NEXTSTATES + numCols);
size += numRows * rowSize;
size = (size + 7) & ~7; // round up to a multiple of 8 bytes
return size;
}
/**
* Create a RBBIDataWrapper.RBBIStateTable for a newly compiled table.
* RBBIDataWrapper.RBBIStateTable is similar to struct RBBIStateTable in ICU4C,
* in common/rbbidata.h
*/
RBBIDataWrapper.RBBIStateTable exportTable() {
int state;
int col;
RBBIDataWrapper.RBBIStateTable table = new RBBIDataWrapper.RBBIStateTable();
if (fRB.fTreeRoots[fRootIx] == null) {
return table;
}
Assert.assrt(fRB.fSetBuilder.getNumCharCategories() < 0x7fff &&
fDStates.size() < 0x7fff);
table.fNumStates = fDStates.size();
table.fDictCategoriesStart = fRB.fSetBuilder.getDictCategoriesStart();
table.fLookAheadResultsSize =
fLASlotsInUse == RBBIDataWrapper.ACCEPTING_UNCONDITIONAL ? 0 : fLASlotsInUse + 1;
boolean use8Bits = table.fNumStates <= MAX_STATE_FOR_8BITS_TABLE;
// Size of table size in shorts.
int rowLen = RBBIDataWrapper.NEXTSTATES + fRB.fSetBuilder.getNumCharCategories(); // Row Length in shorts.
int tableSize;
if (use8Bits) {
tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize); // fTable length in bytes.
table.fTable = new char[tableSize];
table.fRowLen = rowLen; // Row length in bytes.
} else {
tableSize = (getTableSize() - RBBIDataWrapper.RBBIStateTable.fHeaderSize) / 2; // fTable length in shorts.
table.fTable = new char[tableSize];
table.fRowLen = rowLen * 2; // Row length in bytes.
}
if (fRB.fLookAheadHardBreak) {
table.fFlags |= RBBIDataWrapper.RBBI_LOOKAHEAD_HARD_BREAK;
}
if (fRB.fSetBuilder.sawBOF()) {
table.fFlags |= RBBIDataWrapper.RBBI_BOF_REQUIRED;
}
if (use8Bits) {
table.fFlags |= RBBIDataWrapper.RBBI_8BITS_ROWS;
}
int numCharCategories = fRB.fSetBuilder.getNumCharCategories();
for (state=0; state
// Row 0 is the stop state.
// Row 1 is the start sate.
// Row 2 and beyond are other states, initially one per char class, but
// after initial construction, many of the states will be combined, compacting the table.)
// The String holds the nextState data only. The four leading fields of a row, fAccepting,
// fLookAhead, etc. are not needed for the safe table, and are omitted at this stage of building.
assert(fSafeTable == null);
fSafeTable = new ArrayList<>();
for (int row=0; row s) {
for (RBBINode n : s) {
RBBINode.printInt(n.fSerialNum, 8);
}
System.out.println();
}
//-----------------------------------------------------------------------------
//
// printStates Debug Function. Dump the fully constructed state transition table.
//
//-----------------------------------------------------------------------------
void printStates() {
int c; // input "character"
int n; // state number
System.out.print("state | i n p u t s y m b o l s \n");
System.out.print(" | Acc LA Tag");
for (c=0; c tbl = fRB.fRuleStatusVals;
System.out.print("index | tags \n");
System.out.print("-------------------\n");
while (nextRecord < tbl.size()) {
thisRecord = nextRecord;
nextRecord = thisRecord + tbl.get(thisRecord).intValue() + 1;
RBBINode.printInt(thisRecord, 7);
for (i=thisRecord+1; i