All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xerces.impl.xs.models.CMBuilder Maven / Gradle / Ivy

Go to download

Xerces2 is the next generation of high performance, fully compliant XML parsers in the Apache Xerces family. This new version of Xerces introduces the Xerces Native Interface (XNI), a complete framework for building parser components and configurations that is extremely modular and easy to program. The Apache Xerces2 parser is the reference implementation of XNI but other parser components, configurations, and parsers can be written using the Xerces Native Interface. For complete design and implementation documents, refer to the XNI Manual. Xerces2 is a fully conforming XML Schema 1.0 processor. A partial experimental implementation of the XML Schema 1.1 Structures and Datatypes Working Drafts (December 2009) and an experimental implementation of the XML Schema Definition Language (XSD): Component Designators (SCD) Candidate Recommendation (January 2010) are provided for evaluation. For more information, refer to the XML Schema page. Xerces2 also provides a complete implementation of the Document Object Model Level 3 Core and Load/Save W3C Recommendations and provides a complete implementation of the XML Inclusions (XInclude) W3C Recommendation. It also provides support for OASIS XML Catalogs v1.1. Xerces2 is able to parse documents written according to the XML 1.1 Recommendation, except that it does not yet provide an option to enable normalization checking as described in section 2.13 of this specification. It also handles namespaces according to the XML Namespaces 1.1 Recommendation, and will correctly serialize XML 1.1 documents if the DOM level 3 load/save APIs are in use.

There is a newer version: 2.12.2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.xerces.impl.xs.models;

import org.apache.xerces.impl.dtd.models.CMNode;
import org.apache.xerces.impl.xs.SchemaSymbols;
import org.apache.xerces.impl.xs.XSComplexTypeDecl;
import org.apache.xerces.impl.xs.XSDeclarationPool;
import org.apache.xerces.impl.xs.XSElementDecl;
import org.apache.xerces.impl.xs.XSModelGroupImpl;
import org.apache.xerces.impl.xs.XSParticleDecl;

/**
 * This class constructs content models for a given grammar.
 *
 * @xerces.internal 
 *
 * @author Elena Litani, IBM
 * @author Sandy Gao, IBM
 *
 * @version $Id: CMBuilder.java 573322 2007-09-06 16:48:47Z peterjm $
 */
public class CMBuilder {

    // REVISIT: should update the decl pool to cache XSCM objects too
    private XSDeclarationPool fDeclPool = null;

    // It never changes, so a static member is good enough
    private static final XSEmptyCM fEmptyCM = new XSEmptyCM();

    // needed for DFA construction
    private int fLeafCount;
    // needed for UPA
    private int fParticleCount;
    //Factory to create Bin, Uni, Leaf nodes
    private final CMNodeFactory fNodeFactory;

    public CMBuilder(CMNodeFactory nodeFactory) {
        fDeclPool = null;
        fNodeFactory = nodeFactory ;
    }

    public void setDeclPool(XSDeclarationPool declPool) {
        fDeclPool = declPool;
    }

    /**
     * Get content model for the a given type
     *
     * @param typeDecl  get content model for which complex type
     * @return          a content model validator
     */
    public XSCMValidator getContentModel(XSComplexTypeDecl typeDecl, boolean forUPA) {

        // for complex type with empty or simple content,
        // there is no content model validator
        short contentType = typeDecl.getContentType();
        if (contentType == XSComplexTypeDecl.CONTENTTYPE_SIMPLE ||
            contentType == XSComplexTypeDecl.CONTENTTYPE_EMPTY) {
            return null;
        }

        XSParticleDecl particle = (XSParticleDecl)typeDecl.getParticle();

        // if the content is element only or mixed, but no particle
        // is defined, return the empty content model
        if (particle == null)
            return fEmptyCM;

        // if the content model contains "all" model group,
        // we create an "all" content model, otherwise a DFA content model
        XSCMValidator cmValidator = null;
        if (particle.fType == XSParticleDecl.PARTICLE_MODELGROUP &&
            ((XSModelGroupImpl)particle.fValue).fCompositor == XSModelGroupImpl.MODELGROUP_ALL) {
            cmValidator = createAllCM(particle);
        }
        else {
            cmValidator = createDFACM(particle, forUPA);
        }

        //now we are throught building content model and have passed sucessfully of the nodecount check
        //if set by the application
        fNodeFactory.resetNodeCount() ;

        // if the validator returned is null, it means there is nothing in
        // the content model, so we return the empty content model.
        if (cmValidator == null)
            cmValidator = fEmptyCM;

        return cmValidator;
    }

    XSCMValidator createAllCM(XSParticleDecl particle) {
        if (particle.fMaxOccurs == 0)
            return null;

        // get the model group, and add all children of it to the content model
        XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
        // create an all content model. the parameter indicates whether
        // the  itself is optional
        XSAllCM allContent = new XSAllCM(particle.fMinOccurs == 0, group.fParticleCount);
        for (int i = 0; i < group.fParticleCount; i++) {
            // add the element decl to the all content model
            allContent.addElement((XSElementDecl)group.fParticles[i].fValue,
            group.fParticles[i].fMinOccurs == 0);
        }
        return allContent;
    }

    XSCMValidator createDFACM(XSParticleDecl particle, boolean forUPA) {
        fLeafCount = 0;
        fParticleCount = 0;
        // convert particle tree to CM tree
        CMNode node = useRepeatingLeafNodes(particle) ? buildCompactSyntaxTree(particle) : buildSyntaxTree(particle, forUPA);
        if (node == null)
            return null;
        // build DFA content model from the CM tree
        return new XSDFACM(node, fLeafCount);
    }

    // 1. convert particle tree to CM tree:
    // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
    //                                  a{n, m} -> a, a, ..., a?, a?, ...
    // 3. convert model groups (a, b, c, ...) or (a | b | c | ...) to
    //    binary tree: (((a,b),c),...) or (((a|b)|c)|...)
    // 4. make sure each leaf node (XSCMLeaf) has a distinct position
    private CMNode buildSyntaxTree(XSParticleDecl particle, boolean forUPA) {

        int maxOccurs = particle.fMaxOccurs;
        int minOccurs = particle.fMinOccurs;
        
        boolean compactedForUPA = false;
        if (forUPA) {
            // When doing UPA, we reduce the size of the minOccurs/maxOccurs values to make
            // processing the DFA faster.  For UPA the exact values don't matter.
            if (minOccurs > 1) {
                if (maxOccurs > minOccurs || particle.getMaxOccursUnbounded()) {
                    minOccurs = 1;
                    compactedForUPA = true;
                }
                else { // maxOccurs == minOccurs
                    minOccurs = 2;
                    compactedForUPA = true;
                }
            }
            if (maxOccurs > 1) {
                maxOccurs = 2;
                compactedForUPA = true;
            }
        }
        
        short type = particle.fType;
        CMNode nodeRet = null;

        if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
            (type == XSParticleDecl.PARTICLE_ELEMENT)) {
            // (task 1) element and wildcard particles should be converted to
            // leaf nodes
            // REVISIT: Make a clone of the leaf particle, so that if there
            // are two references to the same group, we have two different
            // leaf particles for the same element or wildcard decl.
            // This is useful for checking UPA.
            nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
            // (task 2) expand occurrence values
            nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs);
            if (nodeRet != null) {
                nodeRet.setIsCompactUPAModel(compactedForUPA);
            }
        }
        else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
            // (task 1,3) convert model groups to binary trees
            XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
            CMNode temp = null;
            // when the model group is a choice of more than one particles, but
            // only one of the particle is not empty, (for example
            // 
            //   
            //   
            // 
            // ) we can't not return that one particle ("e"). instead, we should
            // treat such particle as optional ("e?").
            // the following int variable keeps track of the number of non-empty children
            int count = 0;
            for (int i = 0; i < group.fParticleCount; i++) {
                // first convert each child to a CM tree
                temp = buildSyntaxTree(group.fParticles[i], forUPA);
                // then combine them using binary operation
                if (temp != null) {
                    compactedForUPA |= temp.isCompactedForUPA();
                    ++count;
                    if (nodeRet == null) {
                        nodeRet = temp;
                    }
                    else {
                        nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
                    }
                }
            }
            // (task 2) expand occurrence values
            if (nodeRet != null) {
                // when the group is "choice" and the group has one or more empty children, 
                // we need to create a zero-or-one (optional) node for the non-empty particles.
                if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) {
                    nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
                }
                nodeRet = expandContentModel(nodeRet, minOccurs, maxOccurs);
                nodeRet.setIsCompactUPAModel(compactedForUPA);
            }
        }

        return nodeRet;
    }

    // 2. expand all occurrence values: a{n, unbounded} -> a, a, ..., a+
    //                                  a{n, m} -> a, a, ..., a?, a?, ...
    // 4. make sure each leaf node (XSCMLeaf) has a distinct position
    private CMNode expandContentModel(CMNode node,
                                      int minOccurs, int maxOccurs) {

        CMNode nodeRet = null;

        if (minOccurs==1 && maxOccurs==1) {
            nodeRet = node;
        }
        else if (minOccurs==0 && maxOccurs==1) {
            //zero or one
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
        }
        else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            //zero or more
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, node);
        }
        else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            //one or more
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
        }
        else if (maxOccurs == SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            // => a,a,..,a+
            // create a+ node first, then put minOccurs-1 a's in front of it
            // for the first time "node" is used, we don't need to make a copy
            // and for other references to node, we make copies
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, node);
            // (task 4) we need to call copyNode here, so that we append
            // an entire new copy of the node (a subtree). this is to ensure
            // all leaf nodes have distinct position
            // we know that minOccurs > 1
            nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
                                                  multiNodes(node, minOccurs-1, true), nodeRet);
        }
        else {
            // {n,m} => a,a,a,...(a),(a),...
            // first n a's, then m-n a?'s.
            // copyNode is called, for the same reason as above
            if (minOccurs > 0) {
                nodeRet = multiNodes(node, minOccurs, false);
            }
            if (maxOccurs > minOccurs) {
                node = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, node);
                if (nodeRet == null) {
                    nodeRet = multiNodes(node, maxOccurs-minOccurs, false);
                }
                else {
                    nodeRet = fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
                                                          nodeRet, multiNodes(node, maxOccurs-minOccurs, true));
                }
            }
        }

        return nodeRet;
    }

    private CMNode multiNodes(CMNode node, int num, boolean copyFirst) {
        if (num == 0) {
            return null;
        }
        if (num == 1) {
            return copyFirst ? copyNode(node) : node;
        }
        int num1 = num/2;
        return fNodeFactory.getCMBinOpNode(XSModelGroupImpl.MODELGROUP_SEQUENCE,
                                           multiNodes(node, num1, copyFirst),
                                           multiNodes(node, num-num1, true));
    }

    // 4. make sure each leaf node (XSCMLeaf) has a distinct position
    private CMNode copyNode(CMNode node) {
        int type = node.type();
        // for choice or sequence, copy the two subtrees, and combine them
        if (type == XSModelGroupImpl.MODELGROUP_CHOICE ||
            type == XSModelGroupImpl.MODELGROUP_SEQUENCE) {
            XSCMBinOp bin = (XSCMBinOp)node;
            node = fNodeFactory.getCMBinOpNode(type, copyNode(bin.getLeft()),
                                 copyNode(bin.getRight()));
        }
        // for ?+*, copy the subtree, and put it in a new ?+* node
        else if (type == XSParticleDecl.PARTICLE_ZERO_OR_MORE ||
                 type == XSParticleDecl.PARTICLE_ONE_OR_MORE ||
                 type == XSParticleDecl.PARTICLE_ZERO_OR_ONE) {
            XSCMUniOp uni = (XSCMUniOp)node;
            node = fNodeFactory.getCMUniOpNode(type, copyNode(uni.getChild()));
        }
        // for element/wildcard (leaf), make a new leaf node,
        // with a distinct position
        else if (type == XSParticleDecl.PARTICLE_ELEMENT ||
                 type == XSParticleDecl.PARTICLE_WILDCARD) {
            XSCMLeaf leaf = (XSCMLeaf)node;
            node = fNodeFactory.getCMLeafNode(leaf.type(), leaf.getLeaf(), leaf.getParticleId(), fLeafCount++);
        }

        return node;
    }
    
    // A special version of buildSyntaxTree() which builds a compact syntax tree 
    // containing compound leaf nodes which carry occurence information. This method
    // for building the syntax tree is chosen over buildSyntaxTree() when 
    // useRepeatingLeafNodes() returns true.
    private CMNode buildCompactSyntaxTree(XSParticleDecl particle) {
        int maxOccurs = particle.fMaxOccurs;
        int minOccurs = particle.fMinOccurs;
        short type = particle.fType;
        CMNode nodeRet = null;

        if ((type == XSParticleDecl.PARTICLE_WILDCARD) ||
            (type == XSParticleDecl.PARTICLE_ELEMENT)) {
            return buildCompactSyntaxTree2(particle, minOccurs, maxOccurs);
        }
        else if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
            XSModelGroupImpl group = (XSModelGroupImpl)particle.fValue;
            if (group.fParticleCount == 1 && (minOccurs != 1 || maxOccurs != 1)) {
                return buildCompactSyntaxTree2(group.fParticles[0], minOccurs, maxOccurs);
            }
            else {
                CMNode temp = null;
                
                // when the model group is a choice of more than one particles, but
                // only one of the particle is not empty, (for example
                // 
                //   
                //   
                // 
                // ) we can't not return that one particle ("e"). instead, we should
                // treat such particle as optional ("e?").
                // the following int variable keeps track of the number of non-empty children
                int count = 0;
                for (int i = 0; i < group.fParticleCount; i++) {
                    // first convert each child to a CM tree
                    temp = buildCompactSyntaxTree(group.fParticles[i]);
                    // then combine them using binary operation
                    if (temp != null) {
                        ++count;
                        if (nodeRet == null) {
                            nodeRet = temp;
                        }
                        else {
                            nodeRet = fNodeFactory.getCMBinOpNode(group.fCompositor, nodeRet, temp);
                        }
                    }
                }
                if (nodeRet != null) {
                    // when the group is "choice" and the group has one or more empty children, 
                    // we need to create a zero-or-one (optional) node for the non-empty particles.
                    if (group.fCompositor == XSModelGroupImpl.MODELGROUP_CHOICE && count < group.fParticleCount) {
                        nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
                    }
                }
            }
        }
        return nodeRet;
    }
    
    private CMNode buildCompactSyntaxTree2(XSParticleDecl particle, int minOccurs, int maxOccurs) {
        // Convert element and wildcard particles to leaf nodes. Wrap repeating particles in a CMUniOpNode.
        CMNode nodeRet = null;
        if (minOccurs == 1 && maxOccurs == 1) {
            nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
        }
        else if (minOccurs == 0 && maxOccurs == 1) {
            // zero or one
            nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_ONE, nodeRet);
        }
        else if (minOccurs == 0 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            // zero or more
            nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
        }
        else if (minOccurs == 1 && maxOccurs==SchemaSymbols.OCCURRENCE_UNBOUNDED) {
            // one or more
            nodeRet = fNodeFactory.getCMLeafNode(particle.fType, particle.fValue, fParticleCount++, fLeafCount++);
            nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
        }
        else {
            // {n,m}: Instead of expanding this out, create a compound leaf node which carries the 
            // occurence information and wrap it in the appropriate CMUniOpNode.
            nodeRet = fNodeFactory.getCMRepeatingLeafNode(particle.fType, particle.fValue, minOccurs, maxOccurs, fParticleCount++, fLeafCount++);
            if (minOccurs == 0) {
                nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ZERO_OR_MORE, nodeRet);
            }
            else {
                nodeRet = fNodeFactory.getCMUniOpNode(XSParticleDecl.PARTICLE_ONE_OR_MORE, nodeRet);
            }
        }
        return nodeRet;
    }
    
    // This method checks if this particle can be transformed into a compact syntax
    // tree containing compound leaf nodes which carry occurence information. Currently
    // it returns true if each model group has minOccurs/maxOccurs == 1 or 
    // contains only one element/wildcard particle with minOccurs/maxOccurs == 1.
    private boolean useRepeatingLeafNodes(XSParticleDecl particle) {
        int maxOccurs = particle.fMaxOccurs;
        int minOccurs = particle.fMinOccurs;
        short type = particle.fType;
        
        if (type == XSParticleDecl.PARTICLE_MODELGROUP) {
            XSModelGroupImpl group = (XSModelGroupImpl) particle.fValue;
            if (minOccurs != 1 || maxOccurs != 1) {
                if (group.fParticleCount == 1) {
                    XSParticleDecl particle2 = (XSParticleDecl) group.fParticles[0];
                    short type2 = particle2.fType;
                    return ((type2 == XSParticleDecl.PARTICLE_ELEMENT ||
                            type2 == XSParticleDecl.PARTICLE_WILDCARD) &&
                            particle2.fMinOccurs == 1 &&
                            particle2.fMaxOccurs == 1);
                }
                return (group.fParticleCount == 0);
            }
            for (int i = 0; i < group.fParticleCount; ++i) {
                if (!useRepeatingLeafNodes(group.fParticles[i])) {
                    return false;
                }
            }
        }
        return true;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy