org.apache.xerces.impl.xs.models.XSDFACM Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xerces.impl.xs.models;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Vector;
import org.apache.xerces.impl.Constants;
import org.apache.xerces.impl.dtd.models.CMNode;
import org.apache.xerces.impl.dtd.models.CMStateSet;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.apache.xerces.impl.xs.SubstitutionGroupHandler;
import org.apache.xerces.impl.xs.XMLSchemaException;
import org.apache.xerces.impl.xs.XSConstraints;
import org.apache.xerces.impl.xs.XSElementDecl;
import org.apache.xerces.impl.xs.XSElementDeclHelper;
import org.apache.xerces.impl.xs.XSModelGroupImpl;
import org.apache.xerces.impl.xs.XSOpenContentDecl;
import org.apache.xerces.impl.xs.XSParticleDecl;
import org.apache.xerces.impl.xs.XSWildcardDecl;
import org.apache.xerces.xni.QName;
/**
* DFAContentModel is the implementation of XSCMValidator that does
* all of the non-trivial element content validation. This class does
* the conversion from the regular expression to the DFA that
* it then uses in its validation algorithm.
*
* @xerces.internal
*
* @author Neil Graham, IBM
* @version $Id: XSDFACM.java 1367485 2012-07-31 10:55:40Z mukulg $
*/
public class XSDFACM
implements XSCMValidator, XS11CMRestriction.XS11CM {
//
// Constants
//
private static final boolean DEBUG = false;
// special strings
// debugging
/** Set to true to debug content model validation. */
private static final boolean DEBUG_VALIDATE_CONTENT = false;
//
// Data
//
/**
* This is the map of unique input symbol elements to indices into
* each state's per-input symbol transition table entry. This is part
* of the built DFA information that must be kept around to do the
* actual validation. Note tat since either XSElementDecl or XSParticleDecl object
* can live here, we've got to use an Object.
*/
private XSElementDecl fElements[];
private XSWildcardDecl fWildcards[];
private int fNumElements, fNumTotal;
/**
* This is an array of booleans, one per state (there are
* fTransTableSize states in the DFA) that indicates whether that
* state is a final state.
*/
private boolean fFinalStateFlags[] = null;
/**
* The list of follow positions for each NFA position (i.e. for each
* non-epsilon leaf node.) This is only used during the building of
* the DFA, and is let go afterwards.
*/
private CMStateSet fFollowList[] = null;
/**
* This is the head node of our intermediate representation. It is
* only non-null during the building of the DFA (just so that it
* does not have to be passed all around.) Once the DFA is built,
* this is no longer required so its nulled out.
*/
private CMNode fHeadNode = null;
/**
* The count of leaf nodes. This is an important number that set some
* limits on the sizes of data structures in the DFA process.
*/
private int fLeafCount = 0;
/**
* An array of non-epsilon leaf nodes, which is used during the DFA
* build operation, then dropped.
*/
private XSCMLeaf fLeafList[] = null;
/** Array mapping ANY types to the leaf list. */
private int fLeafListType[] = null;
/**
* This is the transition table that is the main by product of all
* of the effort here. It is an array of arrays of ints. The first
* dimension is the number of states we end up with in the DFA. The
* second dimensions is the number of unique elements in the content
* model (fElemMapSize). Each entry in the second dimension indicates
* the new state given that input for the first dimension's start
* state.
*
* The fElemMap array handles mapping from element indexes to
* positions in the second dimension of the transition table.
*/
private int fTransTable[][] = null;
/**
* The open content model
*/
private final XSOpenContentDecl fOpenContent;
/**
* The XML Schema version
*/
private final short fSchemaVersion;
/**
* Array containing occurrence information for looping states
* which use counters to check minOccurs/maxOccurs.
*/
private Occurence [] fCountingStates = null;
static final class Occurence {
final int minOccurs;
final int maxOccurs;
int elemIndex;
public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) {
minOccurs = leaf.getMinOccurs();
maxOccurs = leaf.getMaxOccurs();
this.elemIndex = elemIndex;
}
public String toString() {
return "minOccurs=" + minOccurs
+ ";maxOccurs=" +
((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED)
? Integer.toString(maxOccurs) : "unbounded");
}
}
/**
* The number of valid entries in the transition table, and in the other
* related tables such as fFinalStateFlags.
*/
private int fTransTableSize = 0;
private boolean fIsCompactedForUPA;
// temp variables
//
// Constructors
//
/**
* Constructs a DFA content model.
*
* @param syntaxTree The syntax tree of the content model.
* @param leafCount The number of leaves.
*
* @exception RuntimeException Thrown if DFA can't be built.
*/
public XSDFACM(CMNode syntaxTree, int leafCount, short schemaVersion, XSOpenContentDecl openContent) {
// Store away our index and pools in members
fLeafCount = leafCount;
fIsCompactedForUPA = syntaxTree.isCompactedForUPA();
// Store the XML Schema version
fSchemaVersion = schemaVersion;
// Store open content
fOpenContent = openContent;
//
// Create some string pool indexes that represent the names of some
// magical nodes in the syntax tree.
// (already done in static initialization...
//
//
// Ok, so lets grind through the building of the DFA. This method
// handles the high level logic of the algorithm, but it uses a
// number of helper classes to do its thing.
//
// In order to avoid having hundreds of references to the error and
// string handlers around, this guy and all of his helper classes
// just throw a simple exception and we then pass it along.
//
if(DEBUG_VALIDATE_CONTENT) {
XSDFACM.time -= System.currentTimeMillis();
}
buildDFA(syntaxTree);
if(DEBUG_VALIDATE_CONTENT) {
XSDFACM.time += System.currentTimeMillis();
System.out.println("DFA build: " + XSDFACM.time + "ms");
}
}
private static long time = 0;
//
// XSCMValidator methods
//
/**
* check whether the given state is one of the final states
*
* @param state the state to check
*
* @return whether it's a final state
*/
public boolean isFinalState (int state) {
return (state < 0)? false :
fFinalStateFlags[state];
}
/**
* one transition only
*
* @param curElem The current element's QName
* @param state stack to store the previous state
* @param subGroupHandler the substitution group handler
*
* @return null if transition is invalid; otherwise the Object corresponding to the
* XSElementDecl or XSWildcardDecl identified. Also, the
* state array will be modified to include the new state; this so that the validator can
* store it away.
*
* @exception RuntimeException thrown on error
*/
public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, XSElementDeclHelper eDeclHelper) {
Object ret = oneTransition1(curElem, state, subGroupHandler, eDeclHelper);
if (fOpenContent != null && ret == fOpenContent.fWildcard) {
ret = fOpenContent;
}
return ret;
}
private Object oneTransition1(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, XSElementDeclHelper eDeclHelper) {
int curState = state[0];
if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) {
// there was an error last time; so just go find correct Object in fElemmMap.
// ... after resetting state[0].
if (curState == XSCMValidator.FIRST_ERROR) {
state[0] = XSCMValidator.SUBSEQUENT_ERROR;
}
return findMatchingDecl(curElem, subGroupHandler);
}
int nextState = 0;
int elemIndex = 0;
Object matchingDecl = null;
for (; elemIndex < fNumElements; elemIndex++) {
nextState = fTransTable[curState][elemIndex];
if (nextState == -1)
continue;
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, fElements[elemIndex], fSchemaVersion);
if (matchingDecl != null) {
break;
}
}
if (matchingDecl == null) {
for (; elemIndex < fNumTotal; elemIndex++) {
nextState = fTransTable[curState][elemIndex];
if (nextState == -1)
continue;
// XML Schema 1.0
if (fSchemaVersion < Constants.SCHEMA_VERSION_1_1) {
if (fWildcards[elemIndex].allowNamespace(curElem.uri)) {
matchingDecl = fWildcards[elemIndex];
break;
}
}
// XML Schema 1.1
else if (allowExpandedName(fWildcards[elemIndex], curElem, subGroupHandler, eDeclHelper)) {
matchingDecl = fWildcards[elemIndex];
break;
}
}
}
// if we still can't find a match, set the state to first_error
// and return null
if (matchingDecl == null) {
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
state[0] = nextState;
if (fCountingStates == null) {
return matchingDecl;
}
// Interleave open content can appear anywhere, even while counting
if (fOpenContent != null && fOpenContent.fWildcard == matchingDecl &&
fOpenContent.fMode == XSOpenContentDecl.MODE_INTERLEAVE) {
return matchingDecl;
}
Occurence o = fCountingStates[curState];
if (o != null) {
if (curState == nextState) {
if (++state[2] > o.maxOccurs &&
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
// It's likely that we looped too many times on the current state
// however it's possible that we actually matched another particle
// which allows the same name.
//
// Consider:
//
//
//
//
//
//
// and
//
//
//
//
//
//
// In the DFA there will be two transitions from the current state which
// allow "foo". Note that this is not a UPA violation. The ambiguity of which
// transition to take is resolved by the current value of the counter. Since
// we've already seen enough instances of the first "foo" perhaps there is
// another element declaration or wildcard deeper in the element map which
// matches.
return findMatchingDecl(curElem, state, subGroupHandler, ++elemIndex, eDeclHelper);
}
}
else if (state[2] < o.minOccurs) {
// not enough loops on the current state.
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
else {
// Exiting a counting state. If we're entering a new
// counting state, reset the counter.
o = fCountingStates[nextState];
if (o != null) {
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
}
}
else {
o = fCountingStates[nextState];
if (o != null) {
// Entering a new counting state. Reset the counter.
// If we've already seen one instance of the looping
// particle set the counter to 1, otherwise set it
// to 0.
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
}
return matchingDecl;
} // oneTransition(QName, int[], SubstitutionGroupHandler): Object
Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
Object matchingDecl = null;
for (int elemIndex = 0; elemIndex < fNumElements; elemIndex++) {
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, fElements[elemIndex], fSchemaVersion);
if (matchingDecl != null) {
return matchingDecl;
}
}
for (int elemIndex = fNumElements; elemIndex < fNumTotal; elemIndex++) {
if (fWildcards[elemIndex].allowQName(curElem)) {
return fWildcards[elemIndex];
}
}
return null;
} // findMatchingDecl(QName, SubstitutionGroupHandler): Object
Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex, XSElementDeclHelper eDeclHelper) {
int curState = state[0];
int nextState = 0;
Object matchingDecl = null;
for (; elemIndex < fNumElements; elemIndex++) {
nextState = fTransTable[curState][elemIndex];
if (nextState == -1)
continue;
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, fElements[elemIndex], fSchemaVersion);
if (matchingDecl != null) {
break;
}
}
if (matchingDecl == null) {
for (; elemIndex < fNumTotal; elemIndex++) {
// XML Schema 1.0
if (fSchemaVersion < Constants.SCHEMA_VERSION_1_1) {
if (fWildcards[elemIndex].allowNamespace(curElem.uri)) {
matchingDecl = fWildcards[elemIndex];
break;
}
}
// XML Schema 1.1
else if (allowExpandedName(fWildcards[elemIndex], curElem, subGroupHandler, eDeclHelper)) {
matchingDecl = fWildcards[elemIndex];
break;
}
}
}
// if we still can't find a match, set the state to first_error
// and return null
if (matchingDecl == null) {
state[1] = state[0];
state[0] = XSCMValidator.FIRST_ERROR;
return findMatchingDecl(curElem, subGroupHandler);
}
// if we found a match, set the next state and reset the
// counter if the next state is a counting state.
state[0] = nextState;
final Occurence o = fCountingStates[nextState];
if (o != null) {
state[2] = (elemIndex == o.elemIndex) ? 1 : 0;
}
return matchingDecl;
} // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object
public XSElementDecl findMatchingElemDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) {
XSElementDecl matchingDecl = null;
for (int elemIndex = 0; elemIndex < fNumElements; elemIndex++) {
matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, fElements[elemIndex], fSchemaVersion);
if (matchingDecl != null) {
return matchingDecl;
}
}
return null;
} // findMatchingDecl(QName, SubstitutionGroupHandler): Object
public boolean allowExpandedName(XSWildcardDecl wildcard, QName curElem, SubstitutionGroupHandler subGroupHandler, XSElementDeclHelper eDeclHelper) {
if (wildcard.allowQName(curElem)) {
if (wildcard.fDisallowedSibling && findMatchingElemDecl(curElem, subGroupHandler) != null) {
return false;
}
if (wildcard.fDisallowedDefined && eDeclHelper.getGlobalElementDecl(curElem) != null) {
return false;
}
return true;
}
return false;
}
// This method returns the start states of the content model.
public int[] startContentModel() {
// [0] : the current state
// [1] : if [0] is an error state then the
// last valid state before the error
// [2] : occurence counter for counting states
return new int [3];
} // startContentModel():int[]
// this method returns whether the last state was a valid final state
public boolean endContentModel(int[] state) {
final int curState = state[0];
if (fFinalStateFlags[curState]) {
if (fCountingStates != null) {
Occurence o = fCountingStates[curState];
if (o != null && state[2] < o.minOccurs) {
// not enough loops on the current state to be considered final.
return false;
}
}
return true;
}
return false;
} // endContentModel(int[]): boolean
// Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc.,
// but we can put it back later.
//
// Private methods
//
/**
* Builds the internal DFA transition table from the given syntax tree.
*
* @param syntaxTree The syntax tree.
*
* @exception RuntimeException Thrown if DFA cannot be built.
*/
private void buildDFA(CMNode syntaxTree) {
//
// The first step we need to take is to rewrite the content model
// using our CMNode objects, and in the process get rid of any
// repetition short cuts, converting them into '*' style repetitions
// or getting rid of repetitions altogether.
//
// The conversions done are:
//
// x+ -> (x|x*)
// x? -> (x|epsilon)
//
// This is a relatively complex scenario. What is happening is that
// we create a top level binary node of which the special EOC value
// is set as the right side node. The the left side is set to the
// rewritten syntax tree. The source is the original content model
// info from the decl pool. The rewrite is done by buildSyntaxTree()
// which recurses the decl pool's content of the element and builds
// a new tree in the process.
//
// Note that, during this operation, we set each non-epsilon leaf
// node's DFA state position and count the number of such leafs, which
// is left in the fLeafCount member.
//
// The nodeTmp object is passed in just as a temp node to use during
// the recursion. Otherwise, we'd have to create a new node on every
// level of recursion, which would be piggy in Java (as is everything
// for that matter.)
//
/* MODIFIED (Jan, 2001)
*
* Use following rules.
* nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x)
* nullable(x?) := true, first(x?) := first(x), last(x?) := last(x)
*
* The same computation of follow as x* is applied to x+
*
* The modification drastically reduces computation time of
* "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+"
*/
//
// And handle specially the EOC node, which also must be numbered
// and counted as a non-epsilon leaf node. It could not be handled
// in the above tree build because it was created before all that
// started. We save the EOC position since its used during the DFA
// building loop.
//
int EOCPos = fLeafCount;
XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++);
fHeadNode = new XSCMBinOp(
XSModelGroupImpl.MODELGROUP_SEQUENCE,
syntaxTree,
nodeEOC
);
//
// Ok, so now we have to iterate the new tree and do a little more
// work now that we know the leaf count. One thing we need to do is
// to calculate the first and last position sets of each node. This
// is cached away in each of the nodes.
//
// Along the way we also set the leaf count in each node as the
// maximum state count. They must know this in order to create their
// first/last pos sets.
//
// We also need to build an array of references to the non-epsilon
// leaf nodes. Since we iterate it in the same way as before, this
// will put them in the array according to their position values.
//
fLeafList = new XSCMLeaf[fLeafCount];
fLeafListType = new int[fLeafCount];
postTreeBuildInit(fHeadNode);
//
// And, moving onward... We now need to build the follow position
// sets for all the nodes. So we allocate an array of state sets,
// one for each leaf node (i.e. each DFA position.)
//
fFollowList = new CMStateSet[fLeafCount];
for (int index = 0; index < fLeafCount; index++)
fFollowList[index] = new CMStateSet(fLeafCount);
calcFollowList(fHeadNode);
//
// And finally the big push... Now we build the DFA using all the
// states and the tree we've built up. First we set up the various
// data structures we are going to use while we do this.
//
// First of all we need an array of unique element names in our
// content model. For each transition table entry, we need a set of
// contiguous indices to represent the transitions for a particular
// input element. So we need to a zero based range of indexes that
// map to element types. This element map provides that mapping.
//
Object[] fElemMap = new Object[fLeafCount];
int[] fElemMapType = new int[fLeafCount];
int[] fElemMapId = new int[fLeafCount];
int fElemMapSize = 0;
Occurence [] elemOccurenceMap = null;
int numElem = 0;
for (int outIndex = 0; outIndex < fLeafCount; outIndex++) {
// optimization from Henry Zongaro:
//fElemMap[outIndex] = new Object ();
fElemMap[outIndex] = null;
int inIndex = 0;
final int id = fLeafList[outIndex].getParticleId();
for (; inIndex < fElemMapSize; inIndex++) {
if (id == fElemMapId[inIndex])
break;
}
// If it was not in the list, then add it, if not the EOC node
if (inIndex == fElemMapSize) {
XSCMLeaf leaf = fLeafList[outIndex];
fElemMap[fElemMapSize] = leaf.getLeaf();
if (leaf instanceof XSCMRepeatingLeaf) {
if (elemOccurenceMap == null) {
elemOccurenceMap = new Occurence[fLeafCount];
}
elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize);
}
fElemMapType[fElemMapSize] = fLeafListType[outIndex];
fElemMapId[fElemMapSize] = id;
if (fElemMapType[fElemMapSize] == XSParticleDecl.PARTICLE_ELEMENT) {
numElem++;
}
fElemMapSize++;
}
}
// the last entry in the element map must be the EOC element.
// remove it from the map.
if (DEBUG) {
if (fElemMapId[fElemMapSize-1] != -1)
System.err.println("interal error in DFA: last element is not EOC.");
}
fElemMapSize--;
numElem--;
fNumTotal = fElemMapSize;
if (fOpenContent != null) {
fNumTotal++;
}
// Sort the element map so that elements appear before wildcards?
// this will simplify oneTransition(), because of weakened wc.
for (int ep=0, wp=fElemMapSize-1;;) {
while (ep <= wp && fElemMapType[ep] == XSParticleDecl.PARTICLE_ELEMENT) {
ep++;
}
while (wp >= ep && fElemMapType[wp] == XSParticleDecl.PARTICLE_WILDCARD) {
wp--;
}
if (ep < wp) {
Object t1 = fElemMap[ep];
fElemMap[ep] = fElemMap[wp];
fElemMap[wp] = t1;
int t2 = fElemMapId[ep];
fElemMapId[ep] = fElemMapId[wp];
fElemMapId[wp] = t2;
if (elemOccurenceMap != null) {
Occurence t3 = elemOccurenceMap[ep];
elemOccurenceMap[ep] = elemOccurenceMap[wp];
elemOccurenceMap[wp] = t3;
if (elemOccurenceMap[ep] != null) {
elemOccurenceMap[ep].elemIndex = ep;
}
if (elemOccurenceMap[wp] != null) {
elemOccurenceMap[wp].elemIndex = wp;
}
}
ep++;
wp--;
}
else {
break;
}
}
/***
* Optimization(Jan, 2001); We sort fLeafList according to
* elemIndex which is *uniquely* associated to each leaf.
* We are *assuming* that each element appears in at least one leaf.
**/
int[] fLeafSorter = new int[fLeafCount + fElemMapSize];
int fSortCount = 0;
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
final int id = fElemMapId[elemIndex];
for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) {
if (id == fLeafList[leafIndex].getParticleId())
fLeafSorter[fSortCount++] = leafIndex;
}
fLeafSorter[fSortCount++] = -1;
}
/* Optimization(Jan, 2001) */
//
// Next lets create some arrays, some that hold transient
// information during the DFA build and some that are permament.
// These are kind of sticky since we cannot know how big they will
// get, but we don't want to use any Java collections because of
// performance.
//
// Basically they will probably be about fLeafCount*2 on average,
// but can be as large as 2^(fLeafCount*2), worst case. So we start
// with fLeafCount*4 as a middle ground. This will be very unlikely
// to ever have to expand, though it if does, the overhead will be
// somewhat ugly.
//
int curArraySize = fLeafCount * 4;
CMStateSet[] statesToDo = new CMStateSet[curArraySize];
fFinalStateFlags = new boolean[curArraySize];
fTransTable = new int[curArraySize][];
//
// Ok we start with the initial set as the first pos set of the
// head node (which is the seq node that holds the content model
// and the EOC node.)
//
CMStateSet setT = fHeadNode.firstPos();
//
// Init our two state flags. Basically the unmarked state counter
// is always chasing the current state counter. When it catches up,
// that means we made a pass through that did not add any new states
// to the lists, at which time we are done. We could have used a
// expanding array of flags which we used to mark off states as we
// complete them, but this is easier though less readable maybe.
//
int unmarkedState = 0;
int curState = 0;
//
// Init the first transition table entry, and put the initial state
// into the states to do list, then bump the current state.
//
fTransTable[curState] = makeDefStateList();
statesToDo[curState] = setT;
curState++;
/* Optimization(Jan, 2001); This is faster for
* a large content model such as, "(t001+|t002+|.... |t500+)".
*/
HashMap stateTable = new HashMap();
/* Optimization(Jan, 2001) */
//
// Ok, almost done with the algorithm... We now enter the
// loop where we go until the states done counter catches up with
// the states to do counter.
//
while (unmarkedState < curState) {
//
// Get the first unmarked state out of the list of states to do.
// And get the associated transition table entry.
//
setT = statesToDo[unmarkedState];
int[] transEntry = fTransTable[unmarkedState];
// Mark this one final if it contains the EOC state
fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos);
// Bump up the unmarked state count, marking this state done
unmarkedState++;
// Loop through each possible input symbol in the element map
CMStateSet newSet = null;
/* Optimization(Jan, 2001) */
int sorterIndex = 0;
/* Optimization(Jan, 2001) */
for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) {
//
// Build up a set of states which is the union of all of
// the follow sets of DFA positions that are in the current
// state. If we gave away the new set last time through then
// create a new one. Otherwise, zero out the existing one.
//
if (newSet == null)
newSet = new CMStateSet(fLeafCount);
else
newSet.zeroBits();
/* Optimization(Jan, 2001) */
int leafIndex = fLeafSorter[sorterIndex++];
while (leafIndex != -1) {
// If this leaf index (DFA position) is in the current set...
if (setT.getBit(leafIndex)) {
//
// If this leaf is the current input symbol, then we
// want to add its follow list to the set of states to
// transition to from the current state.
//
newSet.union(fFollowList[leafIndex]);
}
leafIndex = fLeafSorter[sorterIndex++];
}
/* Optimization(Jan, 2001) */
//
// If this new set is not empty, then see if its in the list
// of states to do. If not, then add it.
//
if (!newSet.isEmpty()) {
//
// Search the 'states to do' list to see if this new
// state set is already in there.
//
/* Optimization(Jan, 2001) */
Integer stateObj = (Integer)stateTable.get(newSet);
int stateIndex = (stateObj == null ? curState : stateObj.intValue());
/* Optimization(Jan, 2001) */
// If we did not find it, then add it
if (stateIndex == curState) {
//
// Put this new state into the states to do and init
// a new entry at the same index in the transition
// table.
//
statesToDo[curState] = newSet;
fTransTable[curState] = makeDefStateList();
/* Optimization(Jan, 2001) */
stateTable.put(newSet, new Integer(curState));
/* Optimization(Jan, 2001) */
// We now have a new state to do so bump the count
curState++;
//
// Null out the new set to indicate we adopted it.
// This will cause the creation of a new set on the
// next time around the loop.
//
newSet = null;
}
//
// Now set this state in the transition table's entry
// for this element (using its index), with the DFA
// state we will move to from the current state when we
// see this input element.
//
transEntry[elemIndex] = stateIndex;
// Expand the arrays if we're full
if (curState == curArraySize) {
//
// Yikes, we overflowed the initial array size, so
// we've got to expand all of these arrays. So adjust
// up the size by 50% and allocate new arrays.
//
final int newSize = (int)(curArraySize * 1.5);
CMStateSet[] newToDo = new CMStateSet[newSize];
boolean[] newFinalFlags = new boolean[newSize];
int[][] newTransTable = new int[newSize][];
// Copy over all of the existing content
System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize);
System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize);
System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize);
// Store the new array size
curArraySize = newSize;
statesToDo = newToDo;
fFinalStateFlags = newFinalFlags;
fTransTable = newTransTable;
}
}
}
}
//
// Fill in the occurence information for each looping state
// if we're using counters.
//
if (elemOccurenceMap != null) {
fCountingStates = new Occurence[curState];
for (int i = 0; i < curState; ++i) {
int [] transitions = fTransTable[i];
for (int j = 0; j < transitions.length; ++j) {
if (i == transitions[j]) {
fCountingStates[i] = elemOccurenceMap[j];
break;
}
}
}
}
fTransTableSize = curState;
//
// And now we can say bye bye to the temp representation since we've
// built the DFA.
//
if (DEBUG_VALIDATE_CONTENT)
dumpTree(fHeadNode, 0);
fHeadNode = null;
fLeafList = null;
fFollowList = null;
fLeafListType = null;
fElemMapId = null;
// If there is open content, massage the transition table
if (fOpenContent != null) {
fElemMap[fElemMapSize] = fOpenContent.fWildcard;
if (fOpenContent.fMode == XSOpenContentDecl.MODE_INTERLEAVE) {
for (int i = 0; i < fTransTableSize; i++) {
fTransTable[i][fElemMapSize] = i;
}
}
else {
for (int i = 0; i < fTransTableSize; i++) {
if (fFinalStateFlags[i]) {
fTransTable[i][fElemMapSize] = fTransTableSize;
}
}
// Seems that there is at least one empty spot.
fTransTable[fTransTableSize] = makeDefStateList();
fTransTable[fTransTableSize][fElemMapSize] = fTransTableSize;
fFinalStateFlags[fTransTableSize] = true;
fTransTableSize++;
}
fElemMapSize++;
}
fNumElements = numElem;
if (numElem > 0) {
fElements = new XSElementDecl[numElem];
}
if (fNumTotal > numElem) {
fWildcards = new XSWildcardDecl[fNumTotal];
}
for (int i = 0; i < numElem; i++) {
fElements[i] = (XSElementDecl)fElemMap[i];
}
for (int i = numElem; i < fNumTotal; i++) {
fWildcards[i] = (XSWildcardDecl)fElemMap[i];
}
}
/**
* Calculates the follow list of the current node.
*
* @param nodeCur The curent node.
*
* @exception RuntimeException Thrown if follow list cannot be calculated.
*/
private void calcFollowList(CMNode nodeCur) {
// Recurse as required
if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) {
// Recurse only
calcFollowList(((XSCMBinOp)nodeCur).getLeft());
calcFollowList(((XSCMBinOp)nodeCur).getRight());
}
else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
// Recurse first
calcFollowList(((XSCMBinOp)nodeCur).getLeft());
calcFollowList(((XSCMBinOp)nodeCur).getRight());
//
// Now handle our level. We use our left child's last pos
// set and our right child's first pos set, so go ahead and
// get them ahead of time.
//
final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos();
final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos();
//
// Now, for every position which is in our left child's last set
// add all of the states in our right child's first set to the
// follow set for that position.
//
for (int index = 0; index < fLeafCount; index++) {
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE
|| nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) {
// Recurse first
calcFollowList(((XSCMUniOp)nodeCur).getChild());
//
// Now handle our level. We use our own first and last position
// sets, so get them up front.
//
final CMStateSet first = nodeCur.firstPos();
final CMStateSet last = nodeCur.lastPos();
//
// For every position which is in our last position set, add all
// of our first position states to the follow set for that
// position.
//
for (int index = 0; index < fLeafCount; index++) {
if (last.getBit(index))
fFollowList[index].union(first);
}
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
// Recurse only
calcFollowList(((XSCMUniOp)nodeCur).getChild());
}
}
/**
* Dumps the tree of the current node to standard output.
*
* @param nodeCur The current node.
* @param level The maximum levels to output.
*
* @exception RuntimeException Thrown on error.
*/
private void dumpTree(CMNode nodeCur, int level) {
for (int index = 0; index < level; index++)
System.out.print(" ");
int type = nodeCur.type();
switch(type ) {
case XSModelGroupImpl.MODELGROUP_CHOICE:
case XSModelGroupImpl.MODELGROUP_SEQUENCE: {
if (type == XSModelGroupImpl.MODELGROUP_CHOICE)
System.out.print("Choice Node ");
else
System.out.print("Seq Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1);
dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1);
break;
}
case XSParticleDecl.PARTICLE_ZERO_OR_MORE:
case XSParticleDecl.PARTICLE_ONE_OR_MORE:
case XSParticleDecl.PARTICLE_ZERO_OR_ONE: {
System.out.print("Rep Node ");
if (nodeCur.isNullable())
System.out.print("Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1);
break;
}
case XSParticleDecl.PARTICLE_ELEMENT: {
System.out.print
(
"Leaf: (pos="
+ ((XSCMLeaf)nodeCur).getPosition()
+ "), "
+ "(elemIndex="
+ ((XSCMLeaf)nodeCur).getLeaf()
+ ") "
);
if (nodeCur.isNullable())
System.out.print(" Nullable ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
break;
}
case XSParticleDecl.PARTICLE_WILDCARD:
System.out.print("Any Node: ");
System.out.print("firstPos=");
System.out.print(nodeCur.firstPos().toString());
System.out.print(" lastPos=");
System.out.println(nodeCur.lastPos().toString());
break;
default: {
throw new RuntimeException("ImplementationMessages.VAL_NIICM");
}
}
}
/**
* -1 is used to represent bad transitions in the transition table
* entry for each state. So each entry is initialized to an all -1
* array. This method creates a new entry and initializes it.
*/
private int[] makeDefStateList()
{
int[] retArray = new int[fNumTotal];
for (int index = 0; index < fNumTotal; index++)
retArray[index] = -1;
return retArray;
}
/** Post tree build initialization. */
private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException {
// Set the maximum states on this node
nodeCur.setMaxStates(fLeafCount);
XSCMLeaf leaf = null;
int pos = 0;
// Recurse as required
if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) {
leaf = (XSCMLeaf)nodeCur;
pos = leaf.getPosition();
fLeafList[pos] = leaf;
fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD;
}
else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) ||
(nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) {
postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft());
postTreeBuildInit(((XSCMBinOp)nodeCur).getRight());
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
postTreeBuildInit(((XSCMUniOp)nodeCur).getChild());
}
else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) {
// Put this node in the leaf list at the current index if its
// a non-epsilon leaf.
leaf = (XSCMLeaf)nodeCur;
pos = leaf.getPosition();
fLeafList[pos] = leaf;
fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT;
}
else {
throw new RuntimeException("ImplementationMessages.VAL_NIICM");
}
}
/**
* check whether this content violates UPA constraint.
*
* @param subGroupHandler the substitution group handler
* @param xsConstraints the XML Schema Constraint checker
* @return true if this content model contains other or list wildcard
*/
public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler, XSConstraints xsConstraints) throws XMLSchemaException {
int elemSize = fOpenContent != null ? fNumTotal - 1 : fNumTotal;
// Unique Particle Attribution
// store the conflict results between any two elements in fElemMap
// 0: not compared; -1: no conflict; 1: conflict
// initialize the conflict table (all 0 initially)
byte conflictTable[][] = new byte[elemSize][elemSize];
// for each state, check whether it has overlap transitions
for (int i = 0; i < fTransTableSize; i++) {
for (int j = 0; j < elemSize; j++) {
for (int k = j+1; k < elemSize; k++) {
if (fTransTable[i][j] != -1 &&
fTransTable[i][k] != -1) {
if (conflictTable[j][k] == 0) {
if (xsConstraints.overlapUPA
(j < fNumElements ? (Object)fElements[j] : (Object)fWildcards[j],
k < fNumElements ? (Object)fElements[k] : (Object)fWildcards[k],
subGroupHandler)) {
if (fCountingStates != null) {
Occurence o = fCountingStates[i];
// If "i" is a counting state and exactly one of the transitions
// loops back to "i" then the two particles do not overlap if
// minOccurs == maxOccurs.
if (o != null &&
fTransTable[i][j] == i ^ fTransTable[i][k] == i &&
o.minOccurs == o.maxOccurs) {
conflictTable[j][k] = (byte) -1;
continue;
}
}
conflictTable[j][k] = (byte) 1;
}
else {
conflictTable[j][k] = (byte) -1;
}
}
}
}
}
}
// report all errors
for (int i = 0; i < elemSize; i++) {
for (int j = 0; j < elemSize; j++) {
if (conflictTable[i][j] == 1) {
//errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(),
// fElemMap[j].toString()});
// REVISIT: do we want to report all errors? or just one?
throw new XMLSchemaException("cos-nonambig", new Object[]{
i < fNumElements ? (Object)fElements[i] : (Object)fWildcards[i],
j < fNumElements ? (Object)fElements[j] : (Object)fWildcards[j]});
}
}
}
// if there is a other or list wildcard, we need to check this CM
// again, if this grammar is cached.
for (int i = fNumElements; i < elemSize; i++) {
XSWildcardDecl wildcard = fWildcards[i];
if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST ||
wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) {
return true;
}
}
return false;
}
/**
* Check which elements are valid to appear at this point. This method also
* works if the state is in error, in which case it returns what should
* have been seen.
*
* @param state the current state
* @return a Vector whose entries are instances of
* either XSWildcardDecl or XSElementDecl.
*/
public Vector whatCanGoHere(int[] state) {
int elemSize = fOpenContent != null ? fNumTotal - 1 : fNumTotal;
int curState = state[0];
if (curState < 0)
curState = state[1];
Occurence o = (fCountingStates != null) ?
fCountingStates[curState] : null;
int count = state[2];
Vector ret = new Vector();
for (int elemIndex = 0; elemIndex < elemSize; elemIndex++) {
int nextState = fTransTable[curState][elemIndex];
if (nextState != -1) {
if (o != null) {
if (curState == nextState) {
// Do not include transitions which loop back to the
// current state if we've looped the maximum number
// of times or greater.
if (count >= o.maxOccurs &&
o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
continue;
}
}
// Do not include transitions which advance past the
// current state if we have not looped enough times.
else if (count < o.minOccurs) {
continue;
}
}
ret.addElement(elemIndex < fNumElements ? (Object)fElements[elemIndex] : (Object)fWildcards[elemIndex]);
}
}
return ret;
}
public int [] occurenceInfo(int[] state) {
if (fCountingStates != null) {
int curState = state[0];
if (curState < 0) {
curState = state[1];
}
Occurence o = fCountingStates[curState];
if (o != null) {
int [] occurenceInfo = new int[4];
occurenceInfo[0] = o.minOccurs;
occurenceInfo[1] = o.maxOccurs;
occurenceInfo[2] = state[2];
occurenceInfo[3] = o.elemIndex;
return occurenceInfo;
}
}
return null;
}
public String getTermName(int termId) {
Object term = termId < fNumElements ? (Object)fElements[termId] : (Object)fWildcards[termId];
return (term != null) ? term.toString() : null;
}
public boolean isCompactedForUPA() {
return fIsCompactedForUPA;
}
public XSElementDecl nextElementTransition(int[] s, int[] sn, int[] index) {
for (int idx = index[0] + 1; idx < fNumElements; idx++) {
if (isAllowedTransition(s, sn, idx)) {
index[0] = idx;
return fElements[idx];
}
}
index[0] = -1;
return null;
}
public XSWildcardDecl nextWildcardTransition(int[] s, int[] sn, int[] index) {
for (int idx = index[0] == -1 ? fNumElements : index[0] + 1; idx < fNumTotal; idx++) {
if (isAllowedTransition(s, sn, idx)) {
index[0] = idx;
return fWildcards[idx];
}
}
index[0] = -1;
return null;
}
private boolean isAllowedTransition(int[] s, int[] sn, int index) {
int n = fTransTable[s[0]][index];
if (n == -1) {
// Transition not allowed
return false;
}
// Record the next state
if (sn != null) {
sn[0] = n;
}
// If there are no counting state, the transition is allowed.
if (fCountingStates == null) {
return true;
}
// If the transition is for an interleave open content, then it's
// always allowed, even while counting.
if (index == fNumTotal-1 && fOpenContent != null &&
fOpenContent.fMode == XSOpenContentDecl.MODE_INTERLEAVE) {
return true;
}
// Handling of counting states
Occurence o = fCountingStates[s[0]];
if (o != null) {
// If the previous state is a counting state
if (s[0] == n) {
// And the transition loops back to the same state
if (s[2] == o.maxOccurs) {
// Too many when trying to stay in this loop
return false;
}
// Allowed transition. Increase the count. Don't need to do so
// when we've satisfied the minOccurs requirement and maxOccurs
// is unbounded.
if (sn != null) {
sn[2] = s[2];
if (sn[2] == 0 || sn[2] < o.minOccurs || o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) {
sn[2]++;
}
}
}
else if (s[2] < o.minOccurs) {
// Exiting a counting state, but minOccurs is not satisfied.
return false;
}
else {
// Exiting a counting state. If we're entering a new
// counting state, reset the counter.
o = fCountingStates[n];
if (o != null && sn != null) {
sn[2] = (index == o.elemIndex) ? 1 : 0;
}
}
}
else {
// Previous state was not counting. Check the new state.
o = fCountingStates[n];
if (o != null && sn != null) {
// Entering a new counting state. Reset the counter.
sn[2] = (index == o.elemIndex) ? 1 : 0;
}
}
return true;
}
public boolean isOpenContent(XSWildcardDecl w) {
return fOpenContent != null && fOpenContent.fWildcard == w;
}
public List getDefinedNames(SubstitutionGroupHandler subGroupHandler) {
// Add names of all known elements and their sub-group members
List ret = new ArrayList();
for (int i = 0; i < fNumElements; i++) {
XSElementDecl e = fElements[i];
ret.add(e.fTargetNamespace);
ret.add(e.fName);
if (e.fScope == XSElementDecl.SCOPE_GLOBAL) {
XSElementDecl[] es = subGroupHandler.getSubstitutionGroup(e, fSchemaVersion);
for (int j = 0; j < es.length; j++) {
ret.add(es[j].fTargetNamespace);
ret.add(es[j].fName);
}
}
}
return ret;
}
public void optimizeStates(XS11CMRestriction.XS11CM base, int[] b, int[] d, int indexb) {
// Can only optimize if the current state is a counting state.
if (fCountingStates == null || fCountingStates[d[0]] == null) {
return;
}
// Not sure whether we need this. Better to optimize only when we've
// seen at least one element, to be sure we are "locked in" this state.
if (d[2] <= 0) {
return;
}
// When the count is less than minOccurs, or greater than minOccurs
// but less than maxOccurs, then we can try to bring the count up to
// min/maxOccurs, if base has sufficient space.
int need = 0;
if (d[2] < fCountingStates[d[0]].minOccurs) {
need = fCountingStates[d[0]].minOccurs - d[2];
}
else if (d[2] > fCountingStates[d[0]].minOccurs && d[2] < fCountingStates[d[0]].maxOccurs) {
need = fCountingStates[d[0]].maxOccurs - d[2];
}
// Derived has no space.
if (need == 0) {
return;
}
// Different strategy for different base kind.
if (base instanceof XSDFACM) {
optimizeForDFABase((XSDFACM)base, b, d, need);
}
else if (base instanceof XS11AllCM) {
optimizeForAllBase((XS11AllCM)base, b, d, need, indexb);
}
}
private void optimizeForDFABase(XSDFACM base, int[] b, int[] d, int need) {
// Base must also be on a counting state
if (base.fCountingStates == null || base.fCountingStates[b[0]] == null) {
return;
}
// And have seen at least one element
if (b[2] <= 0) {
return;
}
// If base has max=unbounded, then there's enough space for "need"
if (base.fCountingStates[d[0]] != null) {
if (base.fCountingStates[d[0]].maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
d[2] += need;
if (b[2] + need > base.fCountingStates[d[0]].minOccurs) {
// Avoid setting count bigger than min if max is unbounded.
// It makes no difference. Not exceeding min means fewer
// distinct states, so shorter state-pairs in the list.
b[2] = base.fCountingStates[d[0]].minOccurs;
}
else {
b[2] += need;
}
}
else {
// If base doesn't have sufficient space, lower "need"
if (need > base.fCountingStates[d[0]].maxOccurs - b[2]) {
need = base.fCountingStates[d[0]].maxOccurs - b[2];
}
b[2] += need;
d[2] += need;
}
}
}
private void optimizeForAllBase(XS11AllCM base, int[] b, int[] d, int need, int indexb) {
// Need to have seen an element, to be safe.
if (b[indexb] <= 0) {
return;
}
// Similar to the DFA case.
if (base.maxOccurs(indexb) == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
d[2] += need;
if (b[indexb] + need > base.minOccurs(indexb)) {
b[indexb] = base.minOccurs(indexb);
}
else {
b[indexb] += need;
}
}
else {
if (need > base.maxOccurs(indexb) - b[indexb]) {
need = base.maxOccurs(indexb) - b[indexb];
}
b[indexb] += need;
d[2] += need;
}
}
} // class DFAContentModel