All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xmlbeans.impl.common.XPath Maven / Gradle / Ivy

Go to download

The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.

The newest version!
/*   Copyright 2004 The Apache Software Foundation
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package org.apache.xmlbeans.impl.common;

import javax.xml.namespace.QName;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import org.apache.xmlbeans.XmlError;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.impl.common.XMLChar;


public class XPath
{
    public static class XPathCompileException extends XmlException
    {
        XPathCompileException ( XmlError err )
        {
            super( err.toString(), null, err );
        }
    }

    //
    //
    //

    public static class ExecutionContext
    {
        public ExecutionContext ( )
        {
            _stack = new ArrayList();
        }

        public static final int HIT     = 0x1;
        public static final int DESCEND = 0x2;
        public static final int ATTRS   = 0x4;
        
        public final void init ( XPath xpath )
        {
            if (_xpath != xpath)
            {
                _xpath = xpath;
                
                _paths = new PathContext [ xpath._selector._paths.length ];
                
                for ( int i = 0 ; i < _paths.length ; i++ )
                    _paths[ i ] = new PathContext();
            }

            _stack.clear();

            for ( int i = 0 ; i < _paths.length ; i++ )
                _paths[ i ].init( xpath._selector._paths[ i ] );
        }

        public final int start ( )
        {
            int result = 0;
            
            for ( int i = 0 ; i < _paths.length ; i++ )
                result |= _paths[ i ].start();

            return result;
        }
        
        public final int element ( QName name )
        {
            assert name != null;
            
            _stack.add( name );
            
            int result = 0;
            
            for ( int i = 0 ; i < _paths.length ; i++ )
                result |= _paths[ i ].element( name );

            return result;
        }
        
        public final boolean attr ( QName name )
        {
            boolean hit = false;
            
            for ( int i = 0 ; i < _paths.length ; i++ )
                hit = hit | _paths[ i ].attr( name );

            return hit;
        }

        public final void end ( )
        {
            _stack.remove( _stack.size() - 1 );
            
            for ( int i = 0 ; i < _paths.length ; i++ )
                _paths[ i ].end();
        }
        
        private final class PathContext
        {
            PathContext ( )
            {
                _prev = new ArrayList();
            }
            
            void init ( Step steps )
            {
                _curr = steps;
                _prev.clear();
            }

            private QName top ( int i )
            {
                return (QName) ExecutionContext.this._stack.get( _stack.size() - 1 - i );
            }

            // goes back to the begining of the sequence since last // wildcard
            private void backtrack ( )
            {
                assert _curr != null;
                
                if (_curr._hasBacktrack)
                {   // _backtrack seems to be a pointer to the step that follows a // wildcard
                    // ex: for .//b/c/d steps c and d should backtrack to b in case there isn't a match 
                    _curr = _curr._backtrack;
                    return;
                }

                assert !_curr._deep;

                _curr = _curr._prev;

                search: for ( ; !_curr._deep ; _curr = _curr._prev )
                {
                    int t = 0;
                    
                    for ( Step s = _curr ; !s._deep ; s = s._prev )
                    {
                        if (!s.match( top( t++ )))
                            continue search;
                    }

                    break;
                }
            }
            
            int start ( )
            {
                assert _curr != null;
                assert _curr._prev == null;

                if (_curr._name != null)
                    return _curr._flags;

                // If the steps consist on only a terminator, then the path can
                // only be '.'.  In this case, we get a hit, but there is
                // nothing else to match.  No need to backtrack.

                _curr = null;

                return HIT;
            }
            
            int element ( QName name )
            {
                //System.out.println("  Path.element: " + name);
                _prev.add( _curr );

                if (_curr == null)
                    return 0;

                assert _curr._name != null;

                if (!_curr._attr && _curr.match( name ))
                {
                    if ((_curr = _curr._next)._name != null)
                        return _curr._flags;
                    
                    backtrack();
                    
                    //System.out.println("    element - HIT " + _curr._flags);
                    return _curr == null ? HIT : HIT | _curr._flags;
                }

                for ( ; ; )
                {
                    backtrack();

                    if (_curr == null)
                        return 0;

                    if (_curr.match( name ))
                    {
                        _curr = _curr._next;
                        break;
                    }

                    if (_curr._deep)
                        break;
                }
                
                return _curr._flags;
            }
            
            boolean attr ( QName name )
            {
                return _curr != null && _curr._attr && _curr.match( name );
            }

            void end ( )
            {
                //System.out.println("  Path.end ");
                _curr = (Step) _prev.remove( _prev.size() - 1 );
            }
            
            private Step _curr;
            private List _prev;
        }

        private XPath         _xpath;
        private ArrayList     _stack;
        private PathContext[] _paths;
    }

    //
    //
    //

    public static XPath compileXPath ( String xpath )
        throws XPathCompileException
    {
        return compileXPath( xpath, "$this", null );
    }
    
    public static XPath compileXPath ( String xpath, String currentNodeVar )
        throws XPathCompileException
    {
        return compileXPath( xpath, currentNodeVar, null );
    }

    public static XPath compileXPath ( String xpath, Map namespaces )
            throws XPathCompileException
    {
        return compileXPath( xpath, "$this", namespaces );
    }
    
    public static XPath compileXPath (
        String xpath, String currentNodeVar, Map namespaces )
            throws XPathCompileException
    {
        return
            new CompilationContext( namespaces, currentNodeVar ).
                compile( xpath );
    }

    private static class CompilationContext
    {
        CompilationContext ( Map namespaces, String currentNodeVar )
        {
            assert
                _currentNodeVar == null ||
                _currentNodeVar.startsWith( "$" );

            if (currentNodeVar == null)
                _currentNodeVar = "$this";
            else
                _currentNodeVar = currentNodeVar;

            _namespaces = new HashMap();
            
            _externalNamespaces =
                namespaces == null ? new HashMap() : namespaces;
        }

        XPath compile ( String expr ) throws XPathCompileException
        {
            _offset = 0;
            _line = 1;
            _column = 1;
            _expr = expr;

            return tokenizeXPath();
        }
        
        int currChar ( )
        {
            return currChar( 0 );
        }
        
        int currChar ( int offset )
        {
            return
                _offset + offset >= _expr.length()
                    ? -1
                    : _expr.charAt( _offset + offset );
        }
        
        void advance ( )
        {
            if (_offset < _expr.length())
            {
                char ch = _expr.charAt( _offset );
                
                _offset++;
                _column++;

                if (ch == '\r' || ch == '\n')
                {
                    _line++;
                    _column = 1;

                    if (_offset + 1 < _expr.length())
                    {
                        char nextCh = _expr.charAt( _offset + 1 );

                        if ((nextCh == '\r' || nextCh == '\n') && ch != nextCh)
                            _offset++;
                    }
                }
            }
        }

        void advance ( int count )
        {
            assert count >= 0;
            
            while ( count-- > 0 )
                advance();
        }
                
        boolean isWhitespace ( )
        {
            return isWhitespace( 0 );
        }
        
        boolean isWhitespace ( int offset )
        {
            int ch = currChar( offset );
            return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r';
        }

        boolean isNCNameStart ( )
        {
            return
                currChar() == -1
                    ? false :
                    XMLChar.isNCNameStart( currChar() );
        }
        
        boolean isNCName ( )
        {
            return
                currChar() == -1
                    ? false :
                    XMLChar.isNCName( currChar() );
        }
        
        boolean startsWith ( String s )
        {
            return startsWith( s, 0 );
        }
        
        boolean startsWith ( String s, int offset )
        {
            if (_offset + offset >= _expr.length())
                return false;
            
            return _expr.startsWith( s, _offset + offset );
        }

        private XPathCompileException newError ( String msg )
        {
            XmlError err =
                XmlError.forLocation(
                    msg, XmlError.SEVERITY_ERROR, null,
                    _line, _column, _offset );
                                     
            return new XPathCompileException( err );
        }

        String lookupPrefix ( String prefix ) throws XPathCompileException
        {
            if (_namespaces.containsKey( prefix ))
                return (String) _namespaces.get( prefix );
            
            if (_externalNamespaces.containsKey( prefix ))
                return (String) _externalNamespaces.get( prefix );

            if (prefix.equals( "xml" ))
                  return "http://www.w3.org/XML/1998/namespace";
            
            if (prefix.equals( "xs" ))
                  return "http://www.w3.org/2001/XMLSchema";
            
            if (prefix.equals( "xsi" ))
                  return "http://www.w3.org/2001/XMLSchema-instance";
            
            if (prefix.equals( "fn" ))
                  return "http://www.w3.org/2002/11/xquery-functions";

            if (prefix.equals( "xdt" ))
                  return "http://www.w3.org/2003/11/xpath-datatypes";

            if (prefix.equals( "local" ))
                  return "http://www.w3.org/2003/11/xquery-local-functions";

            throw newError( "Undefined prefix: " + prefix );
        }

        private boolean parseWhitespace ( ) throws XPathCompileException
        {
            boolean sawSpace = false;
            
            while ( isWhitespace() )
            {
                advance();
                sawSpace = true;
            }

            return sawSpace;
        }
        
        //
        // Tokenizing will consume whitespace followed by the tokens, separated
        // by whitespace.  The whitespace following the last token is not
        // consumed.
        //
        
        private boolean tokenize ( String s )
        {
            assert s.length() > 0;
            
            int offset = 0;

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s, offset ))
                return false;

            offset += s.length();

            advance( offset );

            return true;
        }

        private boolean tokenize ( String s1, String s2 )
        {
            assert s1.length() > 0;
            assert s2.length() > 0;
            
            int offset = 0;

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s1, offset ))
                return false;

            offset += s1.length();

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s2, offset ))
                return false;
                
            offset += s2.length();

            advance( offset );

            return true;
        }

        private boolean tokenize ( String s1, String s2, String s3)
        {
            assert s1.length() > 0;
            assert s2.length() > 0;
            assert s3.length() > 0;

            int offset = 0;

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s1, offset ))
                return false;

            offset += s1.length();

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s2, offset ))
                return false;
                
            offset += s2.length();

            while ( isWhitespace( offset ) )
                offset++;
            
            if (!startsWith( s3, offset ))
                return false;
            
            offset += s3.length();

             while ( isWhitespace( offset ) )
                offset++;

            advance( offset );

            return true;
        }
        private boolean tokenize ( String s1, String s2, String s3,String s4) {
            assert s1.length() > 0;
            assert s2.length() > 0;
            assert s3.length() > 0;
            assert s4.length() > 0;

            int offset = 0;

            while ( isWhitespace( offset ) )
                offset++;

            if (!startsWith( s1, offset ))
                return false;

            offset += s1.length();

            while ( isWhitespace( offset ) )
                offset++;

            if (!startsWith( s2, offset ))
                return false;

            offset += s2.length();

            while ( isWhitespace( offset ) )
                offset++;

            if (!startsWith( s3, offset ))
                return false;

            offset += s3.length();

             while ( isWhitespace( offset ) )
                offset++;

            if (!startsWith( s4, offset ))
                return false;

            offset += s4.length();

            advance( offset );

            return true;
        }


        private String tokenizeNCName ( ) throws XPathCompileException
        {
            parseWhitespace();
            
            if (!isNCNameStart())
                throw newError( "Expected non-colonized name" );

            StringBuffer sb = new StringBuffer();

            sb.append( (char) currChar() );

            for ( advance() ; isNCName() ; advance() )
                sb.append( (char) currChar() );

            return sb.toString();
        }

        private QName getAnyQName ( )
        {
            return new QName( "", "" );
        }
        
        private QName tokenizeQName ( ) throws XPathCompileException
        {
            if (tokenize( "*" ))
                return getAnyQName();
            
            String ncName = tokenizeNCName();

            if (!tokenize( ":" ))
                return new QName( lookupPrefix( "" ), ncName );
            
            return
                new QName(
                    lookupPrefix( ncName ),
                    tokenize( "*" ) ? "" : tokenizeNCName() );
        }
        
        private String tokenizeQuotedUri ( ) throws XPathCompileException
        {
            char quote;
            
            if (tokenize( "\"" ))
                quote = '"';
            else  if (tokenize( "'" ))
                quote = '\'';
            else
                throw newError( "Expected quote (\" or ')" );

            StringBuffer sb = new StringBuffer();

            for ( ; ; )
            {
                if (currChar() == -1)
                    throw newError( "Path terminated in URI literal" );
                
                if (currChar() == quote)
                {
                    advance();
                    
                    if (currChar() != quote)
                        break;
                }
                
                sb.append( (char) currChar() );
                
                advance();
            }

            return sb.toString();
        }

        private Step addStep ( boolean deep, boolean attr, QName name, Step steps )
        {
            Step step = new Step( deep, attr, name );

            if (steps == null)
                return step;

            Step s = steps;
            
            while ( steps._next != null )
                steps = steps._next;

            steps._next = step;
            step._prev = steps;

            return s;
        }

        private Step tokenizeSteps ( ) throws XPathCompileException
        {
            if (tokenize( "/" ))
                throw newError( "Absolute paths unsupported" );

            boolean deep;

            if (tokenize( "$", _currentNodeVar, "//" ) || tokenize( ".", "//" ))
                deep = true;
            else if (tokenize( "$", _currentNodeVar, "/" ) || tokenize( ".", "/" ))
                deep = false;
            else if (tokenize( "$", _currentNodeVar ) || tokenize( "." ))
                return addStep( false, false, null, null );
            else
                deep = false;

            Step steps = null;

            // Compile the steps removing /. and mergind //. with the next step

            boolean deepDot = false;

            for ( ; ; )
            {
                if (tokenize( "attribute", "::" ) || tokenize( "@" ))
                {
                    steps = addStep( deep, true, tokenizeQName(), steps );
                    break;
                }

                QName name;
                
                if (tokenize( "." ))
                    deepDot = deepDot || deep;
                else
                {
                    tokenize( "child", "::" );
                    if ((name = tokenizeQName()) != null)
                    {
                        steps = addStep( deep, false, name, steps );
                        deep = false; // only this step needs to be deep
                        // other folowing steps will be deep only if they are preceded by // wildcard
                    }
                }

                if (tokenize( "//" ))
                {
                    deep = true;
                    deepDot = false;
                }
                else if (tokenize( "/" ))
                {
                    if (deepDot)
                        deep = true;
                }
                else
                    break;
            }

            // If there was a //. at the end of th path, then we need to make
            // two paths, one with * at the end and another with @* at the end.

            if ((_lastDeepDot = deepDot))
            {
                _lastDeepDot = true;
                steps = addStep( true, false, getAnyQName(), steps );
            }

            // Add sentinal step (_name == null)
            
            return addStep( false, false, null, steps );
        }

        private void computeBacktrack ( Step steps )
            throws XPathCompileException
        {
            //
            // Compute static backtrack information
            //
            // Note that I use the fact that _hasBacktrack is initialized to
            // false and _backtrack to null in the following code.
            //

            Step s, t;
            
            for ( s = steps ; s != null ; s = t )
            {
                // Compute the segment from [ s, t )
                
                for ( t = s._next ; t != null && !t._deep ; )
                    t = t._next;

                // If the segment is NOT rooted at //, then the backtrack is
                // null for the entire segment, including possible attr and/or
                // sentinal

                if (!s._deep)
                {
                    for ( Step u = s ; u != t ; u = u._next )
                        u._hasBacktrack = true;

                    continue;
                }

                // Compute the sequence [ s, u ) of length n which contain no
                // wild steps.

                int n = 0;
                Step u = s;

                while ( u != t && u._name != null && !u.isWild() && !u._attr )
                {
                    n++;
                    u = u._next;
                }

                // Now, apply KMP to [ s, u ) for fast backtracking

                QName [] pattern = new QName [ n + 1 ];
                int [] kmp = new int [ n + 1 ];

                Step v = s;
                
                for ( int i = 0 ; i < n ; i++ )
                {
                    pattern[ i ] = v._name;
                    v = v._next;
                }

                pattern[ n ] = getAnyQName();

                int i = 0;
                int j = kmp[ 0 ] = -1;

                while ( i < n )
                {
                    while ( j > -1 && !pattern[ i ].equals( pattern[ j ] ) )
                        j = kmp[ j ];

                    if (pattern[ ++i ].equals( pattern[ ++j ] ))
                        kmp[ i ] = kmp[ j ];
                    else
                        kmp[ i ] = j;
                }

                i = 0;
                
                for ( v = s ; v != u ; v = v._next )
                {
                    v._hasBacktrack = true;
                    v._backtrack = s;
                    
                    for ( j = kmp[ i ] ; j > 0 ; j-- )
                        v._backtrack = v._backtrack._next;
                    
                    i++;
                }

                // Compute the success backtrack and stuff it into an attr and
                // sentinal if they exist for this segment
                
                v = s;

                if (n > 1)
                {
                    for ( j = kmp[ n - 1 ] ; j > 0 ; j-- )
                        v = v._next;
                }

                if (u != t && u._attr)
                {
                    u._hasBacktrack = true;
                    u._backtrack = v;
                    u = u._next;
                }

                if (u != t && u._name == null)
                {
                    u._hasBacktrack = true;
                    u._backtrack = v;
                }

                // The first part of a deep segment always backtracks to itself
                
                assert s._deep;

                s._hasBacktrack = true;
                s._backtrack = s;
            }
        }

        private void tokenizePath ( ArrayList paths )
            throws XPathCompileException
        {
            _lastDeepDot = false;
            
            Step steps = tokenizeSteps();
            
            computeBacktrack( steps );

            paths.add( steps );

            // If the last path ended in //., that path will match all
            // elements, here I make a path which matches all attributes.

            if (_lastDeepDot)
            {
                _sawDeepDot = true;
                
                Step s = null;

                for ( Step t = steps ; t != null ; t = t._next )
                {
                    if (t._next != null && t._next._next == null)
                        s = addStep( t._deep, true, t._name, s );
                    else
                        s = addStep( t._deep, t._attr, t._name, s );
                }

                computeBacktrack( s );

                paths.add( s );
            }
        }
        
        private Selector tokenizeSelector ( ) throws XPathCompileException
        {
            ArrayList paths = new ArrayList();

            tokenizePath( paths );

            while ( tokenize( "|" ) )
                tokenizePath( paths );

            return new Selector( (Step[]) paths.toArray( new Step [ 0 ] ) );
        }

        private XPath tokenizeXPath ( ) throws XPathCompileException
        {
            for ( ; ; )
            {
                if (tokenize( "declare", "namespace" ))
                {
                    if (!parseWhitespace())
                        throw newError( "Expected prefix after 'declare namespace'" );

                    String prefix = tokenizeNCName();

                    if (!tokenize( "=" ))
                        throw newError( "Expected '='" );

                    String uri = tokenizeQuotedUri();
                    
                    if (_namespaces.containsKey( prefix ))
                    {
                        throw newError(
                            "Redefinition of namespace prefix: " + prefix );
                    }

                    _namespaces.put( prefix, uri );

                    //return these to saxon:? Is it an error to pass external NS
                    //that conflicts? or should we just override it?
                    if (_externalNamespaces.containsKey( prefix ))
                    {
                        throw newError(
                            "Redefinition of namespace prefix: " + prefix );
                    }
                    _externalNamespaces.put( prefix, uri );

                    if (! tokenize( ";" ))
                    {
//			            throw newError(
//                            "Namespace declaration must end with ;" );
			        }

                    _externalNamespaces.put(_NS_BOUNDARY,new Integer(_offset));

                    continue;
                }
                
                if (tokenize( "declare","default", "element", "namespace" ))
                {
                    String uri = tokenizeQuotedUri();
                    
                    if (_namespaces.containsKey( "" ))
                    {
                        throw newError(
                            "Redefinition of default element namespace" );
                    }

                    _namespaces.put( "", uri );

                    //return these to saxon:? Is it an error to pass external NS
                    //that conflicts? or should we just override it?
                    if (_externalNamespaces.containsKey( XPath._DEFAULT_ELT_NS ))
                    {
                         throw newError("Redefinition of default element namespace : ");
                    }
                    _externalNamespaces.put( XPath._DEFAULT_ELT_NS, uri );

                    if (! tokenize( ";" ))
                        throw newError("Default Namespace declaration must end with ;" );
                    //the boundary is the last ; in the prolog...
                    _externalNamespaces.put(_NS_BOUNDARY,new Integer(_offset));

                    continue;
                }
                
                break;
            }

            // Add the default prefix mapping if it has not been redefined
            
            if (!_namespaces.containsKey( "" ))
                _namespaces.put( "", "" );

            Selector selector = tokenizeSelector();

            parseWhitespace();
            
            if (currChar() != -1)
            {
                throw newError(
                    "Unexpected char '" + (char) currChar() + "'" );
            }

            return new XPath( selector, _sawDeepDot );
        }

        //split of prolog decls that are not standard XPath syntax
        //but work in v1
        private void processNonXpathDecls(){

        }

        private String _expr;

        private boolean _sawDeepDot;  // Saw one overall
        private boolean _lastDeepDot;

        private String _currentNodeVar;
        
       // private Map _namespaces;
        protected Map _namespaces;
        private Map _externalNamespaces;
        
        private int _offset;
        private int _line;
        private int _column;
    }

    private static final class Step
    {
        Step ( boolean deep, boolean attr, QName name )
        {
            _name = name;

            _deep = deep;
            _attr = attr;

            int flags = 0;

            if (_deep || !_attr)
                flags |= ExecutionContext.DESCEND;

            if (_attr)
                flags |= ExecutionContext.ATTRS;

            _flags = flags;
        }

        boolean isWild ( )
        {
            return _name.getLocalPart().length() == 0;
        }

        boolean match ( QName name )
        {
            String local = _name.getLocalPart();
            String nameLocal = name.getLocalPart();
            String uri;
            String nameUri;

            int localLength = local.length();
            int uriLength;

            // match any name to _name when _name is empty ""@""
            if (localLength==0)
            {
                uri = _name.getNamespaceURI();
                uriLength = uri.length();

                if (uriLength==0)
                    return true;

                return uri.equals(name.getNamespaceURI());
            }

            if (localLength!=nameLocal.length())
                return false;

            uri = _name.getNamespaceURI();
            nameUri = name.getNamespaceURI();

            if (uri.length()!=nameUri.length())
                return false;

            return local.equals(nameLocal) && uri.equals(nameUri);
        }

        final boolean _attr;
        final boolean _deep;

        int _flags;
        
        final QName _name;

        Step _next, _prev;

        boolean _hasBacktrack;
        Step    _backtrack;
    }

    private static final class Selector
    {
        Selector ( Step[] paths )
        {
            _paths = paths;
        }

        final Step[] _paths;
    }

    //
    //
    //
    
    private XPath ( Selector selector, boolean sawDeepDot )
    {
        _selector = selector;
        _sawDeepDot = sawDeepDot;
    }

    public boolean sawDeepDot ( )
    {
        return _sawDeepDot;
    }

    public static final String _NS_BOUNDARY = "$xmlbeans!ns_boundary";
    public static final String _DEFAULT_ELT_NS = "$xmlbeans!default_uri";
    private final Selector _selector;
    private final boolean  _sawDeepDot;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy