com.sun.org.apache.xerces.internal.impl.dtd.models.DFAContentModel Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of jaxp-ri Show documentation
Java API for XML Processing Reference Implementation
The newest version!
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 *
 *
 * This file incorporates work covered by the following copyright and
 * permission notice:
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sun.org.apache.xerces.internal.impl.dtd.models;

import java.util.HashMap;

import com.sun.org.apache.xerces.internal.impl.dtd.XMLContentSpec;
import com.sun.org.apache.xerces.internal.xni.QName;

/**

 * @version $Id: DFAContentModel.java,v 1.5 2010-11-01 04:39:43 joehw Exp $
 * DFAContentModel is the derivative of ContentModel that does
 * all of the non-trivial element content validation. This class does 
 * the conversion from the regular expression to the DFA that 
 * it then uses in its validation algorithm.
 * 
 * Note: Upstream work insures that this class will never see
 * a content model with PCDATA in it. Any model with PCDATA is 'mixed' 
 * and is handled via the MixedContentModel class since mixed models 
 * are very constrained in form and easily handled via a special case. 
 * This also makes implementation of this class much easier.
 * 
 * @xerces.internal
 * 
 * @version $Id: DFAContentModel.java,v 1.5 2010-11-01 04:39:43 joehw Exp $
 */
public class DFAContentModel
    implements ContentModelValidator {

    //
    // Constants
    //
    // special strings

    /** Epsilon string. */
    private static String fEpsilonString = "<>";

    /** End-of-content string. */
    private static String fEOCString = "<>";

    /** initializing static members **/
    static {
        fEpsilonString = fEpsilonString.intern();
        fEOCString = fEOCString.intern();
    }

    // debugging

    /** Set to true to debug content model validation. */
    private static final boolean DEBUG_VALIDATE_CONTENT = false;

    //
    // Data
    //

    /* this is the EquivClassComparator object */
    //private EquivClassComparator comparator = null;

    /**
     * This is the map of unique input symbol elements to indices into
     * each state's per-input symbol transition table entry. This is part
     * of the built DFA information that must be kept around to do the
     * actual validation.
     */
    private QName fElemMap[] = null;

    /**
     * This is a map of whether the element map contains information
     * related to ANY models.
     */
    private int fElemMapType[] = null;

    /** The element map size. */
    private int fElemMapSize = 0;

    /** Boolean to distinguish Schema Mixed Content */
    private boolean fMixed;

    /**
     * The NFA position of the special EOC (end of content) node. This
     * is saved away since it's used during the DFA build.
     */
    private int fEOCPos = 0;


    /**
     * This is an array of booleans, one per state (there are
     * fTransTableSize states in the DFA) that indicates whether that
     * state is a final state.
     */
    private boolean fFinalStateFlags[] = null;

    /**
     * The list of follow positions for each NFA position (i.e. for each
     * non-epsilon leaf node.) This is only used during the building of
     * the DFA, and is let go afterwards.
     */
    private CMStateSet fFollowList[] = null;

    /**
     * This is the head node of our intermediate representation. It is
     * only non-null during the building of the DFA (just so that it
     * does not have to be passed all around.) Once the DFA is built,
     * this is no longer required so its nulled out.
     */
    private CMNode fHeadNode = null;

    /**
     * The count of leaf nodes. This is an important number that set some
     * limits on the sizes of data structures in the DFA process.
     */
    private int fLeafCount = 0;

    /**
     * An array of non-epsilon leaf nodes, which is used during the DFA
     * build operation, then dropped.
     */
    private CMLeaf fLeafList[] = null;

    /** Array mapping ANY types to the leaf list. */
    private int fLeafListType[] = null;

    //private ContentLeafNameTypeVector fLeafNameTypeVector = null;

    /**
     * The string pool of our parser session. This is set during construction
     * and kept around.
     */
    //private StringPool fStringPool = null;

    /**
     * This is the transition table that is the main by product of all
     * of the effort here. It is an array of arrays of ints. The first
     * dimension is the number of states we end up with in the DFA. The
     * second dimensions is the number of unique elements in the content
     * model (fElemMapSize). Each entry in the second dimension indicates
     * the new state given that input for the first dimension's start
     * state.
     * 

     * The fElemMap array handles mapping from element indexes to
     * positions in the second dimension of the transition table.
     */
    private int fTransTable[][] = null;

    /**
     * The number of valid entries in the transition table, and in the other
     * related tables such as fFinalStateFlags.
     */
    private int fTransTableSize = 0;

    /**
     * Flag that indicates that even though we have a "complicated"
     * content model, it is valid to have no content. In other words,
     * all parts of the content model are optional. For example:
     * 
     *      <!ELEMENT AllOptional (Optional*,NotRequired?)>
     * 
     */
    private boolean fEmptyContentIsValid = false;

    // temp variables

    /** Temporary qualified name. */
    private final QName fQName = new QName();

    //
    // Constructors
    //


    //
    // Constructors
    //

    /**
     * Constructs a DFA content model.
     *
     * @param syntaxTree    The syntax tree of the content model.
     * @param leafCount     The number of leaves.
     * @param mixed
     *
     */
    public DFAContentModel(CMNode syntaxTree, int leafCount, boolean mixed) {
        // Store away our index and pools in members
        //fStringPool = stringPool;
        fLeafCount = leafCount;


        // this is for Schema Mixed Content
        fMixed = mixed;

        //
        //  Ok, so lets grind through the building of the DFA. This method
        //  handles the high level logic of the algorithm, but it uses a
        //  number of helper classes to do its thing.
        //
        //  In order to avoid having hundreds of references to the error and
        //  string handlers around, this guy and all of his helper classes
        //  just throw a simple exception and we then pass it along.
        //
        buildDFA(syntaxTree);
    }

    //
    // ContentModelValidator methods
    //

    /**
     * Check that the specified content is valid according to this
     * content model. This method can also be called to do 'what if'
     * testing of content models just to see if they would be valid.
     * 
     * A value of -1 in the children array indicates a PCDATA node. All other
     * indexes will be positive and represent child elements. The count can be
     * zero, since some elements have the EMPTY content model and that must be
     * confirmed.
     *
     * @param children The children of this element.  Each integer is an index within
     *                 the StringPool of the child element name.  An index
     *                 of -1 is used to indicate an occurrence of non-whitespace character
     *                 data.
     * @param offset Offset into the array where the children starts.
     * @param length The number of entries in the children array.
     *
     * @return The value -1 if fully valid, else the 0 based index of the child
     *         that first failed. If the value returned is equal to the number
     *         of children, then the specified children are valid but additional
     *         content is required to reach a valid ending state.
     *
     */
    public int validate(QName[] children, int offset, int length) {

        if (DEBUG_VALIDATE_CONTENT)
            System.out.println("DFAContentModel#validateContent");

        //
        // A DFA content model must *always* have at least 1 child
        // so a failure is given if no children present.
        //
        // Defect 782: This is an incorrect statement because a DFA
        // content model is also used for constructions such as:
        //
        //     (Optional*,NotRequired?)
        //
        // where a perfectly valid content would be NO CHILDREN.
        // Therefore, if there are no children, we must check to
        // see if the CMNODE_EOC marker is a valid start state! -Ac
        //
        if (length == 0) {
            if (DEBUG_VALIDATE_CONTENT) {
                System.out.println("!!! no children");
                System.out.println("elemMap="+fElemMap);
                for (int i = 0; i < fElemMap.length; i++) {
                    String uri = fElemMap[i].uri;
                    String localpart = fElemMap[i].localpart;

                    System.out.println("fElemMap["+i+"]="+uri+","+
                                       localpart+" ("+
                                       uri+", "+
                                       localpart+
                                       ')');

                }
                System.out.println("EOCIndex="+fEOCString);
            }

            return fEmptyContentIsValid ? -1 : 0;

        } // if child count == 0

        //
        //  Lets loop through the children in the array and move our way
        //  through the states. Note that we use the fElemMap array to map
        //  an element index to a state index.
        //
        int curState = 0;
        for (int childIndex = 0; childIndex < length; childIndex++)
        {
            // Get the current element index out
            final QName curElem = children[offset + childIndex];
            // ignore mixed text
            if (fMixed && curElem.localpart == null) {
                continue;
            }

            // Look up this child in our element map
            int elemIndex = 0;
            for (; elemIndex < fElemMapSize; elemIndex++)
            {
                int type = fElemMapType[elemIndex] & 0x0f ;
                if (type == XMLContentSpec.CONTENTSPECNODE_LEAF) {
                    //System.out.println("fElemMap["+elemIndex+"]: "+fElemMap[elemIndex]);
                    if (fElemMap[elemIndex].rawname == curElem.rawname) {
                        break;
                    }
                }
                else if (type == XMLContentSpec.CONTENTSPECNODE_ANY) {
                    String uri = fElemMap[elemIndex].uri;
                    if (uri == null || uri == curElem.uri) {
                        break;
                    }
                }
                else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_LOCAL) {
                    if (curElem.uri == null) {
                        break;
                    }
                }
                else if (type == XMLContentSpec.CONTENTSPECNODE_ANY_OTHER) {
                    if (fElemMap[elemIndex].uri != curElem.uri) {
                        break;
                    }
                }
            }

            // If we didn't find it, then obviously not valid
            if (elemIndex == fElemMapSize) {
                if (DEBUG_VALIDATE_CONTENT) {
                    System.out.println("!!! didn't find it");

                    System.out.println("curElem : " +curElem );
                    for (int i=0; i-->