All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.org.apache.xerces.internal.impl.xs.models.XSDFACM Maven / Gradle / Ivy

The newest version!
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2010 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 *
 *
 * This file incorporates work covered by the following copyright and
 * permission notice:
 *
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.sun.org.apache.xerces.internal.impl.xs.models;

import com.sun.org.apache.xerces.internal.xni.QName;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMNode;
import com.sun.org.apache.xerces.internal.impl.dtd.models.CMStateSet;
import com.sun.org.apache.xerces.internal.impl.xs.SchemaSymbols;
import com.sun.org.apache.xerces.internal.impl.xs.SubstitutionGroupHandler;
import com.sun.org.apache.xerces.internal.impl.xs.XSElementDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSParticleDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XSModelGroupImpl;
import com.sun.org.apache.xerces.internal.impl.xs.XSWildcardDecl;
import com.sun.org.apache.xerces.internal.impl.xs.XMLSchemaException;
import com.sun.org.apache.xerces.internal.impl.xs.XSConstraints;

import java.util.Vector;
import java.util.ArrayList;
import java.util.HashMap;

/**
 * DFAContentModel is the implementation of XSCMValidator that does
 * all of the non-trivial element content validation. This class does
 * the conversion from the regular expression to the DFA that
 * it then uses in its validation algorithm.
 *
 * @xerces.internal 
 *
 * @author Neil Graham, IBM
 * @version $Id: XSDFACM.java,v 1.10 2010-11-01 04:39:58 joehw Exp $
 */
public class XSDFACM
    implements XSCMValidator {

    //
    // Constants
    //
    private static final boolean DEBUG = false;

    // special strings

    // debugging

    /** Set to true to debug content model validation. */
    private static final boolean DEBUG_VALIDATE_CONTENT = false;

    //
    // Data
    //

    /**
     * This is the map of unique input symbol elements to indices into
     * each state's per-input symbol transition table entry. This is part
     * of the built DFA information that must be kept around to do the
     * actual validation.  Note tat since either XSElementDecl or XSParticleDecl object
     * can live here, we've got to use an Object.
     */
    private Object fElemMap[] = null;

    /**
     * This is a map of whether the element map contains information
     * related to ANY models.
     */
    private int fElemMapType[] = null;

    /**
     * id of the unique input symbol
     */
    private int fElemMapId[] = null;
    
    /** The element map size. */
    private int fElemMapSize = 0;

    /**
     * This is an array of booleans, one per state (there are
     * fTransTableSize states in the DFA) that indicates whether that
     * state is a final state.
     */
    private boolean fFinalStateFlags[] = null;

    /**
     * The list of follow positions for each NFA position (i.e. for each
     * non-epsilon leaf node.) This is only used during the building of
     * the DFA, and is let go afterwards.
     */
    private CMStateSet fFollowList[] = null;

    /**
     * This is the head node of our intermediate representation. It is
     * only non-null during the building of the DFA (just so that it
     * does not have to be passed all around.) Once the DFA is built,
     * this is no longer required so its nulled out.
     */
    private CMNode fHeadNode = null;

    /**
     * The count of leaf nodes. This is an important number that set some
     * limits on the sizes of data structures in the DFA process.
     */
    private int fLeafCount = 0;

    /**
     * An array of non-epsilon leaf nodes, which is used during the DFA
     * build operation, then dropped.
     */
    private XSCMLeaf fLeafList[] = null;

    /** Array mapping ANY types to the leaf list. */
    private int fLeafListType[] = null;

    /**
     * This is the transition table that is the main by product of all
     * of the effort here. It is an array of arrays of ints. The first
     * dimension is the number of states we end up with in the DFA. The
     * second dimensions is the number of unique elements in the content
     * model (fElemMapSize). Each entry in the second dimension indicates
     * the new state given that input for the first dimension's start
     * state.
     * 

* The fElemMap array handles mapping from element indexes to * positions in the second dimension of the transition table. */ private int fTransTable[][] = null; /** * Array containing occurence information for looping states * which use counters to check minOccurs/maxOccurs. */ private Occurence [] fCountingStates = null; static final class Occurence { final int minOccurs; final int maxOccurs; final int elemIndex; public Occurence (XSCMRepeatingLeaf leaf, int elemIndex) { minOccurs = leaf.getMinOccurs(); maxOccurs = leaf.getMaxOccurs(); this.elemIndex = elemIndex; } public String toString() { return "minOccurs=" + minOccurs + ";maxOccurs=" + ((maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) ? Integer.toString(maxOccurs) : "unbounded"); } } /** * The number of valid entries in the transition table, and in the other * related tables such as fFinalStateFlags. */ private int fTransTableSize = 0; /** * Array of counters for all the for elements (or wildcards) * of the form a{n,m} where n > 1 and m <= unbounded. Used * to count the a's to later check against n and m. Counter * set to -1 if element (or wildcard) not optimized by * constant space algorithm. */ private int fElemMapCounter[]; /** * Array of lower bounds for all the for elements (or wildcards) * of the form a{n,m} where n > 1 and m <= unbounded. This array * stores the n's for those elements (or wildcards) for which * the constant space algorithm applies (or -1 otherwise). */ private int fElemMapCounterLowerBound[]; /** * Array of upper bounds for all the for elements (or wildcards) * of the form a{n,m} where n > 1 and m <= unbounded. This array * stores the n's for those elements (or wildcards) for which * the constant space algorithm applies, or -1 if algorithm does * not apply or m = unbounded. */ private int fElemMapCounterUpperBound[]; // -1 if no upper bound // temp variables // // Constructors // /** * Constructs a DFA content model. * * @param syntaxTree The syntax tree of the content model. * @param leafCount The number of leaves. * * @exception RuntimeException Thrown if DFA can't be built. */ public XSDFACM(CMNode syntaxTree, int leafCount) { // Store away our index and pools in members fLeafCount = leafCount; // // Create some string pool indexes that represent the names of some // magical nodes in the syntax tree. // (already done in static initialization... // // // Ok, so lets grind through the building of the DFA. This method // handles the high level logic of the algorithm, but it uses a // number of helper classes to do its thing. // // In order to avoid having hundreds of references to the error and // string handlers around, this guy and all of his helper classes // just throw a simple exception and we then pass it along. // if(DEBUG_VALIDATE_CONTENT) { XSDFACM.time -= System.currentTimeMillis(); } buildDFA(syntaxTree); if(DEBUG_VALIDATE_CONTENT) { XSDFACM.time += System.currentTimeMillis(); System.out.println("DFA build: " + XSDFACM.time + "ms"); } } private static long time = 0; // // XSCMValidator methods // /** * check whether the given state is one of the final states * * @param state the state to check * * @return whether it's a final state */ public boolean isFinalState (int state) { return (state < 0)? false : fFinalStateFlags[state]; } /** * one transition only * * @param curElem The current element's QName * @param state stack to store the previous state * @param subGroupHandler the substitution group handler * * @return null if transition is invalid; otherwise the Object corresponding to the * XSElementDecl or XSWildcardDecl identified. Also, the * state array will be modified to include the new state; this so that the validator can * store it away. * * @exception RuntimeException thrown on error */ public Object oneTransition(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler) { int curState = state[0]; if(curState == XSCMValidator.FIRST_ERROR || curState == XSCMValidator.SUBSEQUENT_ERROR) { // there was an error last time; so just go find correct Object in fElemmMap. // ... after resetting state[0]. if(curState == XSCMValidator.FIRST_ERROR) state[0] = XSCMValidator.SUBSEQUENT_ERROR; return findMatchingDecl(curElem, subGroupHandler); } int nextState = 0; int elemIndex = 0; Object matchingDecl = null; for (; elemIndex < fElemMapSize; elemIndex++) { nextState = fTransTable[curState][elemIndex]; if (nextState == -1) continue; int type = fElemMapType[elemIndex] ; if (type == XSParticleDecl.PARTICLE_ELEMENT) { matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); if (matchingDecl != null) { // Increment counter if constant space algorithm applies if (fElemMapCounter[elemIndex] >= 0) { fElemMapCounter[elemIndex]++; } break; } } else if (type == XSParticleDecl.PARTICLE_WILDCARD) { if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { matchingDecl = fElemMap[elemIndex]; // Increment counter if constant space algorithm applies if (fElemMapCounter[elemIndex] >= 0) { fElemMapCounter[elemIndex]++; } break; } } } // if we still can't find a match, set the state to first_error // and return null if (elemIndex == fElemMapSize) { state[1] = state[0]; state[0] = XSCMValidator.FIRST_ERROR; return findMatchingDecl(curElem, subGroupHandler); } if (fCountingStates != null) { Occurence o = fCountingStates[curState]; if (o != null) { if (curState == nextState) { if (++state[2] > o.maxOccurs && o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { // It's likely that we looped too many times on the current state // however it's possible that we actually matched another particle // which allows the same name. // // Consider: // // // // // // // and // // // // // // // In the DFA there will be two transitions from the current state which // allow "foo". Note that this is not a UPA violation. The ambiguity of which // transition to take is resolved by the current value of the counter. Since // we've already seen enough instances of the first "foo" perhaps there is // another element declaration or wildcard deeper in the element map which // matches. return findMatchingDecl(curElem, state, subGroupHandler, elemIndex); } } else if (state[2] < o.minOccurs) { // not enough loops on the current state. state[1] = state[0]; state[0] = XSCMValidator.FIRST_ERROR; return findMatchingDecl(curElem, subGroupHandler); } else { // Exiting a counting state. If we're entering a new // counting state, reset the counter. o = fCountingStates[nextState]; if (o != null) { state[2] = (elemIndex == o.elemIndex) ? 1 : 0; } } } else { o = fCountingStates[nextState]; if (o != null) { // Entering a new counting state. Reset the counter. // If we've already seen one instance of the looping // particle set the counter to 1, otherwise set it // to 0. state[2] = (elemIndex == o.elemIndex) ? 1 : 0; } } } state[0] = nextState; return matchingDecl; } // oneTransition(QName, int[], SubstitutionGroupHandler): Object Object findMatchingDecl(QName curElem, SubstitutionGroupHandler subGroupHandler) { Object matchingDecl = null; for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { int type = fElemMapType[elemIndex] ; if (type == XSParticleDecl.PARTICLE_ELEMENT) { matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); if (matchingDecl != null) { return matchingDecl; } } else if (type == XSParticleDecl.PARTICLE_WILDCARD) { if(((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) return fElemMap[elemIndex]; } } return null; } // findMatchingDecl(QName, SubstitutionGroupHandler): Object Object findMatchingDecl(QName curElem, int[] state, SubstitutionGroupHandler subGroupHandler, int elemIndex) { int curState = state[0]; int nextState = 0; Object matchingDecl = null; while (++elemIndex < fElemMapSize) { nextState = fTransTable[curState][elemIndex]; if (nextState == -1) continue; int type = fElemMapType[elemIndex] ; if (type == XSParticleDecl.PARTICLE_ELEMENT) { matchingDecl = subGroupHandler.getMatchingElemDecl(curElem, (XSElementDecl)fElemMap[elemIndex]); if (matchingDecl != null) { break; } } else if (type == XSParticleDecl.PARTICLE_WILDCARD) { if (((XSWildcardDecl)fElemMap[elemIndex]).allowNamespace(curElem.uri)) { matchingDecl = fElemMap[elemIndex]; break; } } } // if we still can't find a match, set the state to FIRST_ERROR and return null if (elemIndex == fElemMapSize) { state[1] = state[0]; state[0] = XSCMValidator.FIRST_ERROR; return findMatchingDecl(curElem, subGroupHandler); } // if we found a match, set the next state and reset the // counter if the next state is a counting state. state[0] = nextState; final Occurence o = fCountingStates[nextState]; if (o != null) { state[2] = (elemIndex == o.elemIndex) ? 1 : 0; } return matchingDecl; } // findMatchingDecl(QName, int[], SubstitutionGroupHandler, int): Object // This method returns the start states of the content model. public int[] startContentModel() { // Clear all constant space algorithm counters in use for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { if (fElemMapCounter[elemIndex] != -1) { fElemMapCounter[elemIndex] = 0; } } // [0] : the current state // [1] : if [0] is an error state then the // last valid state before the error // [2] : occurence counter for counting states return new int [3]; } // startContentModel():int[] // this method returns whether the last state was a valid final state public boolean endContentModel(int[] state) { final int curState = state[0]; if (fFinalStateFlags[curState]) { if (fCountingStates != null) { Occurence o = fCountingStates[curState]; if (o != null && state[2] < o.minOccurs) { // not enough loops on the current state to be considered final. return false; } } return true; } return false; } // endContentModel(int[]): boolean // Killed off whatCanGoHere; we may need it for DOM canInsert(...) etc., // but we can put it back later. // // Private methods // /** * Builds the internal DFA transition table from the given syntax tree. * * @param syntaxTree The syntax tree. * * @exception RuntimeException Thrown if DFA cannot be built. */ private void buildDFA(CMNode syntaxTree) { // // The first step we need to take is to rewrite the content model // using our CMNode objects, and in the process get rid of any // repetition short cuts, converting them into '*' style repetitions // or getting rid of repetitions altogether. // // The conversions done are: // // x+ -> (x|x*) // x? -> (x|epsilon) // // This is a relatively complex scenario. What is happening is that // we create a top level binary node of which the special EOC value // is set as the right side node. The the left side is set to the // rewritten syntax tree. The source is the original content model // info from the decl pool. The rewrite is done by buildSyntaxTree() // which recurses the decl pool's content of the element and builds // a new tree in the process. // // Note that, during this operation, we set each non-epsilon leaf // node's DFA state position and count the number of such leafs, which // is left in the fLeafCount member. // // The nodeTmp object is passed in just as a temp node to use during // the recursion. Otherwise, we'd have to create a new node on every // level of recursion, which would be piggy in Java (as is everything // for that matter.) // /* MODIFIED (Jan, 2001) * * Use following rules. * nullable(x+) := nullable(x), first(x+) := first(x), last(x+) := last(x) * nullable(x?) := true, first(x?) := first(x), last(x?) := last(x) * * The same computation of follow as x* is applied to x+ * * The modification drastically reduces computation time of * "(a, (b, a+, (c, (b, a+)+, a+, (d, (c, (b, a+)+, a+)+, (b, a+)+, a+)+)+)+)+" */ // // And handle specially the EOC node, which also must be numbered // and counted as a non-epsilon leaf node. It could not be handled // in the above tree build because it was created before all that // started. We save the EOC position since its used during the DFA // building loop. // int EOCPos = fLeafCount; XSCMLeaf nodeEOC = new XSCMLeaf(XSParticleDecl.PARTICLE_ELEMENT, null, -1, fLeafCount++); fHeadNode = new XSCMBinOp( XSModelGroupImpl.MODELGROUP_SEQUENCE, syntaxTree, nodeEOC ); // // Ok, so now we have to iterate the new tree and do a little more // work now that we know the leaf count. One thing we need to do is // to calculate the first and last position sets of each node. This // is cached away in each of the nodes. // // Along the way we also set the leaf count in each node as the // maximum state count. They must know this in order to create their // first/last pos sets. // // We also need to build an array of references to the non-epsilon // leaf nodes. Since we iterate it in the same way as before, this // will put them in the array according to their position values. // fLeafList = new XSCMLeaf[fLeafCount]; fLeafListType = new int[fLeafCount]; postTreeBuildInit(fHeadNode); // // And, moving onward... We now need to build the follow position // sets for all the nodes. So we allocate an array of state sets, // one for each leaf node (i.e. each DFA position.) // fFollowList = new CMStateSet[fLeafCount]; for (int index = 0; index < fLeafCount; index++) fFollowList[index] = new CMStateSet(fLeafCount); calcFollowList(fHeadNode); // // And finally the big push... Now we build the DFA using all the // states and the tree we've built up. First we set up the various // data structures we are going to use while we do this. // // First of all we need an array of unique element names in our // content model. For each transition table entry, we need a set of // contiguous indices to represent the transitions for a particular // input element. So we need to a zero based range of indexes that // map to element types. This element map provides that mapping. // fElemMap = new Object[fLeafCount]; fElemMapType = new int[fLeafCount]; fElemMapId = new int[fLeafCount]; fElemMapCounter = new int[fLeafCount]; fElemMapCounterLowerBound = new int[fLeafCount]; fElemMapCounterUpperBound = new int[fLeafCount]; fElemMapSize = 0; Occurence [] elemOccurenceMap = null; for (int outIndex = 0; outIndex < fLeafCount; outIndex++) { // optimization from Henry Zongaro: //fElemMap[outIndex] = new Object (); fElemMap[outIndex] = null; int inIndex = 0; final int id = fLeafList[outIndex].getParticleId(); for (; inIndex < fElemMapSize; inIndex++) { if (id == fElemMapId[inIndex]) break; } // If it was not in the list, then add it, if not the EOC node if (inIndex == fElemMapSize) { XSCMLeaf leaf = fLeafList[outIndex]; fElemMap[fElemMapSize] = leaf.getLeaf(); if (leaf instanceof XSCMRepeatingLeaf) { if (elemOccurenceMap == null) { elemOccurenceMap = new Occurence[fLeafCount]; } elemOccurenceMap[fElemMapSize] = new Occurence((XSCMRepeatingLeaf) leaf, fElemMapSize); } fElemMapType[fElemMapSize] = fLeafListType[outIndex]; fElemMapId[fElemMapSize] = id; // Init counters and bounds for a{n,m} algorithm int[] bounds = (int[]) leaf.getUserData(); if (bounds != null) { fElemMapCounter[fElemMapSize] = 0; fElemMapCounterLowerBound[fElemMapSize] = bounds[0]; fElemMapCounterUpperBound[fElemMapSize] = bounds[1]; } else { fElemMapCounter[fElemMapSize] = -1; fElemMapCounterLowerBound[fElemMapSize] = -1; fElemMapCounterUpperBound[fElemMapSize] = -1; } fElemMapSize++; } } // the last entry in the element map must be the EOC element. // remove it from the map. if (DEBUG) { if (fElemMapId[fElemMapSize-1] != -1) System.err.println("interal error in DFA: last element is not EOC."); } fElemMapSize--; /*** * Optimization(Jan, 2001); We sort fLeafList according to * elemIndex which is *uniquely* associated to each leaf. * We are *assuming* that each element appears in at least one leaf. **/ int[] fLeafSorter = new int[fLeafCount + fElemMapSize]; int fSortCount = 0; for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { final int id = fElemMapId[elemIndex]; for (int leafIndex = 0; leafIndex < fLeafCount; leafIndex++) { if (id == fLeafList[leafIndex].getParticleId()) fLeafSorter[fSortCount++] = leafIndex; } fLeafSorter[fSortCount++] = -1; } /* Optimization(Jan, 2001) */ // // Next lets create some arrays, some that hold transient // information during the DFA build and some that are permament. // These are kind of sticky since we cannot know how big they will // get, but we don't want to use any Java collections because of // performance. // // Basically they will probably be about fLeafCount*2 on average, // but can be as large as 2^(fLeafCount*2), worst case. So we start // with fLeafCount*4 as a middle ground. This will be very unlikely // to ever have to expand, though it if does, the overhead will be // somewhat ugly. // int curArraySize = fLeafCount * 4; CMStateSet[] statesToDo = new CMStateSet[curArraySize]; fFinalStateFlags = new boolean[curArraySize]; fTransTable = new int[curArraySize][]; // // Ok we start with the initial set as the first pos set of the // head node (which is the seq node that holds the content model // and the EOC node.) // CMStateSet setT = fHeadNode.firstPos(); // // Init our two state flags. Basically the unmarked state counter // is always chasing the current state counter. When it catches up, // that means we made a pass through that did not add any new states // to the lists, at which time we are done. We could have used a // expanding array of flags which we used to mark off states as we // complete them, but this is easier though less readable maybe. // int unmarkedState = 0; int curState = 0; // // Init the first transition table entry, and put the initial state // into the states to do list, then bump the current state. // fTransTable[curState] = makeDefStateList(); statesToDo[curState] = setT; curState++; /* Optimization(Jan, 2001); This is faster for * a large content model such as, "(t001+|t002+|.... |t500+)". */ HashMap stateTable = new HashMap(); /* Optimization(Jan, 2001) */ // // Ok, almost done with the algorithm... We now enter the // loop where we go until the states done counter catches up with // the states to do counter. // while (unmarkedState < curState) { // // Get the first unmarked state out of the list of states to do. // And get the associated transition table entry. // setT = statesToDo[unmarkedState]; int[] transEntry = fTransTable[unmarkedState]; // Mark this one final if it contains the EOC state fFinalStateFlags[unmarkedState] = setT.getBit(EOCPos); // Bump up the unmarked state count, marking this state done unmarkedState++; // Loop through each possible input symbol in the element map CMStateSet newSet = null; /* Optimization(Jan, 2001) */ int sorterIndex = 0; /* Optimization(Jan, 2001) */ for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { // // Build up a set of states which is the union of all of // the follow sets of DFA positions that are in the current // state. If we gave away the new set last time through then // create a new one. Otherwise, zero out the existing one. // if (newSet == null) newSet = new CMStateSet(fLeafCount); else newSet.zeroBits(); /* Optimization(Jan, 2001) */ int leafIndex = fLeafSorter[sorterIndex++]; while (leafIndex != -1) { // If this leaf index (DFA position) is in the current set... if (setT.getBit(leafIndex)) { // // If this leaf is the current input symbol, then we // want to add its follow list to the set of states to // transition to from the current state. // newSet.union(fFollowList[leafIndex]); } leafIndex = fLeafSorter[sorterIndex++]; } /* Optimization(Jan, 2001) */ // // If this new set is not empty, then see if its in the list // of states to do. If not, then add it. // if (!newSet.isEmpty()) { // // Search the 'states to do' list to see if this new // state set is already in there. // /* Optimization(Jan, 2001) */ Integer stateObj = (Integer)stateTable.get(newSet); int stateIndex = (stateObj == null ? curState : stateObj.intValue()); /* Optimization(Jan, 2001) */ // If we did not find it, then add it if (stateIndex == curState) { // // Put this new state into the states to do and init // a new entry at the same index in the transition // table. // statesToDo[curState] = newSet; fTransTable[curState] = makeDefStateList(); /* Optimization(Jan, 2001) */ stateTable.put(newSet, new Integer(curState)); /* Optimization(Jan, 2001) */ // We now have a new state to do so bump the count curState++; // // Null out the new set to indicate we adopted it. // This will cause the creation of a new set on the // next time around the loop. // newSet = null; } // // Now set this state in the transition table's entry // for this element (using its index), with the DFA // state we will move to from the current state when we // see this input element. // transEntry[elemIndex] = stateIndex; // Expand the arrays if we're full if (curState == curArraySize) { // // Yikes, we overflowed the initial array size, so // we've got to expand all of these arrays. So adjust // up the size by 50% and allocate new arrays. // final int newSize = (int)(curArraySize * 1.5); CMStateSet[] newToDo = new CMStateSet[newSize]; boolean[] newFinalFlags = new boolean[newSize]; int[][] newTransTable = new int[newSize][]; // Copy over all of the existing content System.arraycopy(statesToDo, 0, newToDo, 0, curArraySize); System.arraycopy(fFinalStateFlags, 0, newFinalFlags, 0, curArraySize); System.arraycopy(fTransTable, 0, newTransTable, 0, curArraySize); // Store the new array size curArraySize = newSize; statesToDo = newToDo; fFinalStateFlags = newFinalFlags; fTransTable = newTransTable; } } } } // // Fill in the occurence information for each looping state // if we're using counters. // if (elemOccurenceMap != null) { fCountingStates = new Occurence[curState]; for (int i = 0; i < curState; ++i) { int [] transitions = fTransTable[i]; for (int j = 0; j < transitions.length; ++j) { if (i == transitions[j]) { fCountingStates[i] = elemOccurenceMap[j]; break; } } } } // // And now we can say bye bye to the temp representation since we've // built the DFA. // if (DEBUG_VALIDATE_CONTENT) dumpTree(fHeadNode, 0); fHeadNode = null; fLeafList = null; fFollowList = null; fLeafListType = null; fElemMapId = null; } /** * Calculates the follow list of the current node. * * @param nodeCur The curent node. * * @exception RuntimeException Thrown if follow list cannot be calculated. */ private void calcFollowList(CMNode nodeCur) { // Recurse as required if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) { // Recurse only calcFollowList(((XSCMBinOp)nodeCur).getLeft()); calcFollowList(((XSCMBinOp)nodeCur).getRight()); } else if (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE) { // Recurse first calcFollowList(((XSCMBinOp)nodeCur).getLeft()); calcFollowList(((XSCMBinOp)nodeCur).getRight()); // // Now handle our level. We use our left child's last pos // set and our right child's first pos set, so go ahead and // get them ahead of time. // final CMStateSet last = ((XSCMBinOp)nodeCur).getLeft().lastPos(); final CMStateSet first = ((XSCMBinOp)nodeCur).getRight().firstPos(); // // Now, for every position which is in our left child's last set // add all of the states in our right child's first set to the // follow set for that position. // for (int index = 0; index < fLeafCount; index++) { if (last.getBit(index)) fFollowList[index].union(first); } } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE) { // Recurse first calcFollowList(((XSCMUniOp)nodeCur).getChild()); // // Now handle our level. We use our own first and last position // sets, so get them up front. // final CMStateSet first = nodeCur.firstPos(); final CMStateSet last = nodeCur.lastPos(); // // For every position which is in our last position set, add all // of our first position states to the follow set for that // position. // for (int index = 0; index < fLeafCount; index++) { if (last.getBit(index)) fFollowList[index].union(first); } } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { // Recurse only calcFollowList(((XSCMUniOp)nodeCur).getChild()); } } /** * Dumps the tree of the current node to standard output. * * @param nodeCur The current node. * @param level The maximum levels to output. * * @exception RuntimeException Thrown on error. */ private void dumpTree(CMNode nodeCur, int level) { for (int index = 0; index < level; index++) System.out.print(" "); int type = nodeCur.type(); switch(type ) { case XSModelGroupImpl.MODELGROUP_CHOICE: case XSModelGroupImpl.MODELGROUP_SEQUENCE: { if (type == XSModelGroupImpl.MODELGROUP_CHOICE) System.out.print("Choice Node "); else System.out.print("Seq Node "); if (nodeCur.isNullable()) System.out.print("Nullable "); System.out.print("firstPos="); System.out.print(nodeCur.firstPos().toString()); System.out.print(" lastPos="); System.out.println(nodeCur.lastPos().toString()); dumpTree(((XSCMBinOp)nodeCur).getLeft(), level+1); dumpTree(((XSCMBinOp)nodeCur).getRight(), level+1); break; } case XSParticleDecl.PARTICLE_ZERO_OR_MORE: case XSParticleDecl.PARTICLE_ONE_OR_MORE: case XSParticleDecl.PARTICLE_ZERO_OR_ONE: { System.out.print("Rep Node "); if (nodeCur.isNullable()) System.out.print("Nullable "); System.out.print("firstPos="); System.out.print(nodeCur.firstPos().toString()); System.out.print(" lastPos="); System.out.println(nodeCur.lastPos().toString()); dumpTree(((XSCMUniOp)nodeCur).getChild(), level+1); break; } case XSParticleDecl.PARTICLE_ELEMENT: { System.out.print ( "Leaf: (pos=" + ((XSCMLeaf)nodeCur).getPosition() + "), " + "(elemIndex=" + ((XSCMLeaf)nodeCur).getLeaf() + ") " ); if (nodeCur.isNullable()) System.out.print(" Nullable "); System.out.print("firstPos="); System.out.print(nodeCur.firstPos().toString()); System.out.print(" lastPos="); System.out.println(nodeCur.lastPos().toString()); break; } case XSParticleDecl.PARTICLE_WILDCARD: System.out.print("Any Node: "); System.out.print("firstPos="); System.out.print(nodeCur.firstPos().toString()); System.out.print(" lastPos="); System.out.println(nodeCur.lastPos().toString()); break; default: { throw new RuntimeException("ImplementationMessages.VAL_NIICM"); } } } /** * -1 is used to represent bad transitions in the transition table * entry for each state. So each entry is initialized to an all -1 * array. This method creates a new entry and initializes it. */ private int[] makeDefStateList() { int[] retArray = new int[fElemMapSize]; for (int index = 0; index < fElemMapSize; index++) retArray[index] = -1; return retArray; } /** Post tree build initialization. */ private void postTreeBuildInit(CMNode nodeCur) throws RuntimeException { // Set the maximum states on this node nodeCur.setMaxStates(fLeafCount); XSCMLeaf leaf = null; int pos = 0; // Recurse as required if (nodeCur.type() == XSParticleDecl.PARTICLE_WILDCARD) { leaf = (XSCMLeaf)nodeCur; pos = leaf.getPosition(); fLeafList[pos] = leaf; fLeafListType[pos] = XSParticleDecl.PARTICLE_WILDCARD; } else if ((nodeCur.type() == XSModelGroupImpl.MODELGROUP_CHOICE) || (nodeCur.type() == XSModelGroupImpl.MODELGROUP_SEQUENCE)) { postTreeBuildInit(((XSCMBinOp)nodeCur).getLeft()); postTreeBuildInit(((XSCMBinOp)nodeCur).getRight()); } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_MORE || nodeCur.type() == XSParticleDecl.PARTICLE_ONE_OR_MORE || nodeCur.type() == XSParticleDecl.PARTICLE_ZERO_OR_ONE) { postTreeBuildInit(((XSCMUniOp)nodeCur).getChild()); } else if (nodeCur.type() == XSParticleDecl.PARTICLE_ELEMENT) { // Put this node in the leaf list at the current index if its // a non-epsilon leaf. leaf = (XSCMLeaf)nodeCur; pos = leaf.getPosition(); fLeafList[pos] = leaf; fLeafListType[pos] = XSParticleDecl.PARTICLE_ELEMENT; } else { throw new RuntimeException("ImplementationMessages.VAL_NIICM"); } } /** * check whether this content violates UPA constraint. * * @param subGroupHandler the substitution group handler * @return true if this content model contains other or list wildcard */ public boolean checkUniqueParticleAttribution(SubstitutionGroupHandler subGroupHandler) throws XMLSchemaException { // Unique Particle Attribution // store the conflict results between any two elements in fElemMap // 0: not compared; -1: no conflict; 1: conflict // initialize the conflict table (all 0 initially) byte conflictTable[][] = new byte[fElemMapSize][fElemMapSize]; // for each state, check whether it has overlap transitions for (int i = 0; i < fTransTable.length && fTransTable[i] != null; i++) { for (int j = 0; j < fElemMapSize; j++) { for (int k = j+1; k < fElemMapSize; k++) { if (fTransTable[i][j] != -1 && fTransTable[i][k] != -1) { if (conflictTable[j][k] == 0) { if (XSConstraints.overlapUPA (fElemMap[j], fElemMap[k], subGroupHandler)) { if (fCountingStates != null) { Occurence o = fCountingStates[i]; // If "i" is a counting state and exactly one of the transitions // loops back to "i" then the two particles do not overlap if // minOccurs == maxOccurs. if (o != null && fTransTable[i][j] == i ^ fTransTable[i][k] == i && o.minOccurs == o.maxOccurs) { conflictTable[j][k] = (byte) -1; continue; } } conflictTable[j][k] = (byte) 1; } else { conflictTable[j][k] = (byte) -1; } } } } } } // report all errors for (int i = 0; i < fElemMapSize; i++) { for (int j = 0; j < fElemMapSize; j++) { if (conflictTable[i][j] == 1) { //errors.newError("cos-nonambig", new Object[]{fElemMap[i].toString(), // fElemMap[j].toString()}); // REVISIT: do we want to report all errors? or just one? throw new XMLSchemaException("cos-nonambig", new Object[]{fElemMap[i].toString(), fElemMap[j].toString()}); } } } // if there is a other or list wildcard, we need to check this CM // again, if this grammar is cached. for (int i = 0; i < fElemMapSize; i++) { if (fElemMapType[i] == XSParticleDecl.PARTICLE_WILDCARD) { XSWildcardDecl wildcard = (XSWildcardDecl)fElemMap[i]; if (wildcard.fType == XSWildcardDecl.NSCONSTRAINT_LIST || wildcard.fType == XSWildcardDecl.NSCONSTRAINT_NOT) { return true; } } } return false; } /** * Check which elements are valid to appear at this point. This method also * works if the state is in error, in which case it returns what should * have been seen. * * @param state the current state * @return a Vector whose entries are instances of * either XSWildcardDecl or XSElementDecl. */ public Vector whatCanGoHere(int[] state) { int curState = state[0]; if (curState < 0) curState = state[1]; Occurence o = (fCountingStates != null) ? fCountingStates[curState] : null; int count = state[2]; Vector ret = new Vector(); for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { int nextState = fTransTable[curState][elemIndex]; if (nextState != -1) { if (o != null) { if (curState == nextState) { // Do not include transitions which loop back to the // current state if we've looped the maximum number // of times or greater. if (count >= o.maxOccurs && o.maxOccurs != SchemaSymbols.OCCURRENCE_UNBOUNDED) { continue; } } // Do not include transitions which advance past the // current state if we have not looped enough times. else if (count < o.minOccurs) { continue; } } ret.addElement(fElemMap[elemIndex]); } } return ret; } /** * Used by constant space algorithm for a{n,m} for n > 1 and * m <= unbounded. Called by a validator if validation of * countent model succeeds after subsuming a{n,m} to a* * (or a+) to check the n and m bounds. * Returns null if validation of bounds is * successful. Returns a list of strings with error info * if not. Even entries in list returned are error codes * (used to look up properties) and odd entries are parameters * to be passed when formatting error message. Each parameter * is associated with the error code that preceeds it in * the list. */ public ArrayList checkMinMaxBounds() { ArrayList result = null; for (int elemIndex = 0; elemIndex < fElemMapSize; elemIndex++) { int count = fElemMapCounter[elemIndex]; if (count == -1) { continue; } final int minOccurs = fElemMapCounterLowerBound[elemIndex]; final int maxOccurs = fElemMapCounterUpperBound[elemIndex]; if (count < minOccurs) { if (result == null) result = new ArrayList(); result.add("cvc-complex-type.2.4.b"); result.add("{" + fElemMap[elemIndex] + "}"); } if (maxOccurs != -1 && count > maxOccurs) { if (result == null) result = new ArrayList(); result.add("cvc-complex-type.2.4.e"); result.add("{" + fElemMap[elemIndex] + "}"); } } return result; } } // class DFAContentModel





© 2015 - 2024 Weber Informatics LLC | Privacy Policy