org.apache.xerces.impl.dtd.models.DFAContentModel Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.xerces.impl.dtd.models;
import java.util.HashMap;
import org.apache.xerces.impl.dtd.XMLContentSpec;
import org.apache.xerces.xni.QName;
/**
* DFAContentModel is the derivative of ContentModel that does
* all of the non-trivial element content validation. This class does
* the conversion from the regular expression to the DFA that
* it then uses in its validation algorithm.
*
* Note: Upstream work insures that this class will never see
* a content model with PCDATA in it. Any model with PCDATA is 'mixed'
* and is handled via the MixedContentModel class since mixed models
* are very constrained in form and easily handled via a special case.
* This also makes implementation of this class much easier.
*
* @xerces.internal
*
* @version $Id: DFAContentModel.java 572057 2007-09-02 18:03:20Z mrglavas $
*/
public class DFAContentModel
implements ContentModelValidator {
//
// Constants
//
// special strings
/** Epsilon string. */
private static String fEpsilonString = "<>";
/** End-of-content string. */
private static String fEOCString = "<>";
/** initializing static members **/
static {
fEpsilonString = fEpsilonString.intern();
fEOCString = fEOCString.intern();
}
// debugging
/** Set to true to debug content model validation. */
private static final boolean DEBUG_VALIDATE_CONTENT = false;
//
// Data
//
/* this is the EquivClassComparator object */
//private EquivClassComparator comparator = null;
/**
* This is the map of unique input symbol elements to indices into
* each state's per-input symbol transition table entry. This is part
* of the built DFA information that must be kept around to do the
* actual validation.
*/
private QName fElemMap[] = null;
/**
* This is a map of whether the element map contains information
* related to ANY models.
*/
private int fElemMapType[] = null;
/** The element map size. */
private int fElemMapSize = 0;
/** Boolean to distinguish Schema Mixed Content */
private boolean fMixed;
/**
* The NFA position of the special EOC (end of content) node. This
* is saved away since it's used during the DFA build.
*/
private int fEOCPos = 0;
/**
* This is an array of booleans, one per state (there are
* fTransTableSize states in the DFA) that indicates whether that
* state is a final state.
*/
private boolean fFinalStateFlags[] = null;
/**
* The list of follow positions for each NFA position (i.e. for each
* non-epsilon leaf node.) This is only used during the building of
* the DFA, and is let go afterwards.
*/
private CMStateSet fFollowList[] = null;
/**
* This is the head node of our intermediate representation. It is
* only non-null during the building of the DFA (just so that it
* does not have to be passed all around.) Once the DFA is built,
* this is no longer required so its nulled out.
*/
private CMNode fHeadNode = null;
/**
* The count of leaf nodes. This is an important number that set some
* limits on the sizes of data structures in the DFA process.
*/
private int fLeafCount = 0;
/**
* An array of non-epsilon leaf nodes, which is used during the DFA
* build operation, then dropped.
*/
private CMLeaf fLeafList[] = null;
/** Array mapping ANY types to the leaf list. */
private int fLeafListType[] = null;
//private ContentLeafNameTypeVector fLeafNameTypeVector = null;
/**
* The string pool of our parser session. This is set during construction
* and kept around.
*/
//private StringPool fStringPool = null;
/**
* This is the transition table that is the main by product of all
* of the effort here. It is an array of arrays of ints. The first
* dimension is the number of states we end up with in the DFA. The
* second dimensions is the number of unique elements in the content
* model (fElemMapSize). Each entry in the second dimension indicates
* the new state given that input for the first dimension's start
* state.
*
* The fElemMap array handles mapping from element indexes to
* positions in the second dimension of the transition table.
*/
private int fTransTable[][] = null;
/**
* The number of valid entries in the transition table, and in the other
* related tables such as fFinalStateFlags.
*/
private int fTransTableSize = 0;
/**
* Flag that indicates that even though we have a "complicated"
* content model, it is valid to have no content. In other words,
* all parts of the content model are optional. For example:
*
* <!ELEMENT AllOptional (Optional*,NotRequired?)>
*
*/
private boolean fEmptyContentIsValid = false;
// temp variables
/** Temporary qualified name. */
private final QName fQName = new QName();
//
// Constructors
//
//
// Constructors
//
/**
* Constructs a DFA content model.
*
* @param syntaxTree The syntax tree of the content model.
* @param leafCount The number of leaves.
* @param mixed
*
*/
public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
// Store away our index and pools in members
//fStringPool = stringPool;
fLeafCount = leafCount;
// this is for Schema Mixed Content
fMixed = mixed;
//
// Ok, so lets grind through the building of the DFA. This method
// handles the high level logic of the algorithm, but it uses a
// number of helper classes to do its thing.
//
// In order to avoid having hundreds of references to the error and
// string handlers around, this guy and all of his helper classes
// just throw a simple exception and we then pass it along.
//
buildDFA(syntaxTree);
}
//
// ContentModelValidator methods
//
/**
* Check that the specified content is valid according to this
* content model. This method can also be called to do 'what if'
* testing of content models just to see if they would be valid.
*
* A value of -1 in the children array indicates a PCDATA node. All other
* indexes will be positive and represent child elements. The count can be
* zero, since some elements have the EMPTY content model and that must be
* confirmed.
*
* @param children The children of this element. Each integer is an index within
* the StringPool
of the child element name. An index
* of -1 is used to indicate an occurrence of non-whitespace character
* data.
* @param offset Offset into the array where the children starts.
* @param length The number of entries in the children
array.
*
* @return The value -1 if fully valid, else the 0 based index of the child
* that first failed. If the value returned is equal to the number
* of children, then the specified children are valid but additional
* content is required to reach a valid ending state.
*
*/
public int validate(QName[] children, int offset, int length) {
if (DEBUG_VALIDATE_CONTENT)
System.out.println("DFAContentModel#validateContent");
//
// A DFA content model must *always* have at least 1 child
// so a failure is given if no children present.
//
// Defect 782: This is an incorrect statement because a DFA
// content model is also used for constructions such as:
//
// (Optional*,NotRequired?)
//
// where a perfectly valid content would be NO CHILDREN.
// Therefore, if there are no children, we must check to
// see if the CMNODE_EOC marker is a valid start state! -Ac
//
if (length == 0) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! no children");
System.out.println("elemMap="+fElemMap);
for (int i = 0; i < fElemMap.length; i++) {
String uri = fElemMap[i].uri;
String localpart = fElemMap[i].localpart;
System.out.println("fElemMap["+i+"]="+uri+","+
localpart+" ("+
uri+", "+
localpart+
')');
}
System.out.println("EOCIndex="+fEOCString);
}
return fEmptyContentIsValid ? -1 : 0;
} // if child count == 0
//
// Lets loop through the children in the array and move our way
// through the states. Note that we use the fElemMap array to map
// an element index to a state index.
//
int curState = 0;
for (int childIndex = 0; childIndex < length; childIndex++)
{
// Get the current element index out
final QName curElem = children[offset + childIndex];
// ignore mixed text
if (fMixed && curElem.localpart == null) {
continue;
}
// Look up this child in our element map
int elemIndex = 0;
for (; elemIndex < fElemMapSize; elemIndex++)
{
int type = fElemMapType[elemIndex] & 0x0f ;
if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
//System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
if (fElemMap[elemIndex].rawname == curElem.rawname) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
String uri = fElemMap[elemIndex].uri;
if (uri == null || uri == curElem.uri) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
if (curElem.uri == null) {
break;
}
}
else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
if (fElemMap[elemIndex].uri != curElem.uri) {
break;
}
}
}
// If we didn't find it, then obviously not valid
if (elemIndex == fElemMapSize) {
if (DEBUG_VALIDATE_CONTENT) {
System.out.println("!!! didn't find it");
System.out.println("curElem : " +curElem );
for (int i=0; i