All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xmlbeans.impl.store.CharUtil Maven / Gradle / Ivy

There is a newer version: 4.0.115
Show newest version
/*   Copyright 2004 The Apache Software Foundation
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.xmlbeans.impl.store;

import java.io.PrintStream;
import java.lang.ref.SoftReference;

public final class CharUtil
{
    public CharUtil ( int charBufSize )
    {
        _charBufSize = charBufSize;
    }

    public CharIterator getCharIterator ( Object src, int off, int cch )
    {
        _charIter.init( src, off, cch );
        return _charIter;
    }
    
    public CharIterator getCharIterator ( Object src, int off, int cch, int start )
    {
        _charIter.init( src, off, cch, start );
        return _charIter;
    }

    public static CharUtil getThreadLocalCharUtil ( )
    {
        SoftReference softRef = (SoftReference)tl_charUtil.get();
        CharUtil charUtil = (CharUtil) softRef.get();
        if (charUtil==null)
        {
            charUtil = new CharUtil( CHARUTIL_INITIAL_BUFSIZE );
            tl_charUtil.set(new SoftReference(charUtil));
        }
        return charUtil;
    }

    public static void getString ( StringBuffer sb, Object src, int off, int cch )
    {
        assert isValid( src, off, cch );

        if (cch == 0)
            return;

        if (src instanceof char[])
            sb.append( (char[]) src, off, cch );
        else if (src instanceof String)
        {
            String s = (String) src;
            
            if (off == 0 && cch == s.length())
                sb.append( (String) src );
            else
                sb.append( s.substring( off, off + cch ) );
        }
        else
            ((CharJoin) src).getString( sb, off, cch );
    }
    
    public static void getChars ( char[] chars, int start, Object src, int off, int cch )
    {
        assert isValid( src, off, cch );
        assert chars != null && start >= 0 && start <= chars.length;

        if (cch == 0)
            return;

        if (src instanceof char[])
            System.arraycopy( (char[]) src, off, chars, start, cch );
        else if (src instanceof String)
            ((String) src).getChars( off, off + cch, chars, start );
        else
            ((CharJoin) src).getChars( chars, start, off, cch );
    }
    
    public static String getString ( Object src, int off, int cch )
    {
        assert isValid( src, off, cch );

        if (cch == 0)
            return "";

        if (src instanceof char[])
            return new String( (char[]) src, off, cch );

        if (src instanceof String)
        {
            String s = (String) src;

            if (off == 0 && cch == s.length())
                return s;

            return s.substring( off, off + cch );
        }

        StringBuffer sb = new StringBuffer();
        
        ((CharJoin) src).getString( sb, off, cch );
        
        return sb.toString();
    }

    public static final boolean isWhiteSpace ( char ch )
    {
        switch ( ch )
        {
            case ' ': case '\t': case '\n': case '\r': return true;
            default                                  : return false;
        }
    }

    public final boolean isWhiteSpace ( Object src, int off, int cch )
    {
        assert isValid( src, off, cch );

        if (cch <= 0)
            return true;
        
        if (src instanceof char[])
        {
            for ( char[] chars = (char[]) src ; cch > 0 ; cch-- )
                if (!isWhiteSpace( chars[ off++ ] ))
                    return false;

            return true;
        }
            
        if (src instanceof String)
        {
            for ( String s = (String) src ; cch > 0 ; cch-- )
                if (!isWhiteSpace( s.charAt( off++ ) ))
                    return false;

            return true;
        }
            
        boolean isWhite = true;

        for ( _charIter.init( src, off, cch ) ; _charIter.hasNext() ; )
        {
            if (!isWhiteSpace( _charIter.next() ))
            {
                isWhite = false;
                break;
            }
        }

        _charIter.release();

        return isWhite;
    }

    public Object stripLeft ( Object src, int off, int cch )
    {
        assert isValid( src, off, cch );

        if (cch > 0)
        {
            if (src instanceof char[])
            {
                char[] chars = (char[]) src;

                while ( cch > 0 && isWhiteSpace( chars[ off ] ) )
                    { cch--; off++; }
            }
            else if (src instanceof String)
            {
                String s = (String) src;

                while ( cch > 0 && isWhiteSpace( s.charAt( off ) ) )
                    { cch--; off++; }
            }
            else
            {
                int count = 0;
                
                for ( _charIter.init( src, off, cch ) ; _charIter.hasNext() ; count++ )
                    if (!isWhiteSpace( _charIter.next() ))
                        break;
                
                _charIter.release();

                off += count;
            }
        }

        if (cch == 0)
        {
            _offSrc = 0;
            _cchSrc = 0;
            
            return null;
        }

        _offSrc = off;
        _cchSrc = cch;

        return src;
    }

    public Object stripRight ( Object src, int off, int cch )
    {
        assert isValid( src, off, cch );
        
        if (cch > 0)
        {
            for ( _charIter.init( src, off, cch, cch ) ; _charIter.hasPrev() ; cch-- )
                if (!isWhiteSpace( _charIter.prev() ))
                    break;

            _charIter.release();
        }
        
        if (cch == 0)
        {
            _offSrc = 0;
            _cchSrc = 0;
            
            return null;
        }

        _offSrc = off;
        _cchSrc = cch;

        return src;
    }
    
    public Object insertChars (
        int posInsert,
        Object src, int off, int cch,
        Object srcInsert, int offInsert, int cchInsert )
    {
        assert isValid( src, off, cch );
        assert isValid( srcInsert, offInsert, cchInsert );
        assert posInsert >= 0 && posInsert <= cch;

        // TODO - at some point, instead of creating joins, I should
        // normalize all the text into a single buffer to stop large
        // tree's from being built when many modifications happen...

        // TODO - actually, I should see if the size of the new char
        // sequence is small enough to simply allocate a new contigous
        // sequence, either in a common char[] managed by the master,
        // or just create a new string ... this goes for remove chars
        // as well.

        if (cchInsert == 0)
        {
            _cchSrc = cch;
            _offSrc = off;
            return src;
        }

        if (cch == 0)
        {
            _cchSrc = cchInsert;
            _offSrc = offInsert;
            return srcInsert;
        }

        _cchSrc = cch + cchInsert;

        Object newSrc;

        if (_cchSrc <= MAX_COPY && canAllocate( _cchSrc ))
        {
            char[] c = allocate( _cchSrc );

            getChars( c, _offSrc, src, off, posInsert );
            getChars( c, _offSrc + posInsert, srcInsert, offInsert, cchInsert );
            getChars( c, _offSrc + posInsert + cchInsert, src, off + posInsert, cch - posInsert );

            newSrc = c;
        }
        else
        {
            _offSrc = 0;

            CharJoin newJoin;

            if (posInsert == 0)
                newJoin = new CharJoin( srcInsert, offInsert, cchInsert, src, off );
            else if (posInsert == cch)
                newJoin = new CharJoin( src, off, cch, srcInsert, offInsert );
            else
            {
                CharJoin j = new CharJoin( src, off, posInsert, srcInsert, offInsert );
                newJoin = new CharJoin( j, 0, posInsert + cchInsert, src, off + posInsert );
            }
            
            if (newJoin._depth > CharJoin.MAX_DEPTH)
                newSrc = saveChars( newJoin, _offSrc, _cchSrc );
            else
                newSrc = newJoin;
        }

        assert isValid( newSrc, _offSrc, _cchSrc );

        return newSrc;
    }

    public Object removeChars ( int posRemove, int cchRemove, Object src, int off, int cch )
    {
        assert isValid( src, off, cch );
        assert posRemove >= 0 && posRemove <= cch;
        assert cchRemove >= 0 && posRemove + cchRemove <= cch;

        Object newSrc;

        _cchSrc = cch - cchRemove;
        
        if (_cchSrc == 0)
        {
            newSrc = null;
            _offSrc = 0;
        }
        else if (posRemove == 0)
        {
            newSrc = src;
            _offSrc = off + cchRemove;
        }
        else if (posRemove + cchRemove == cch)
        {
            newSrc = src;
            _offSrc = off;
        }
        else
        {
            int cchAfter = cch - cchRemove;
            
            if (cchAfter <= MAX_COPY && canAllocate( cchAfter ))
            {
                char[] chars = allocate( cchAfter );

                getChars( chars, _offSrc, src, off, posRemove );

                getChars(
                    chars, _offSrc + posRemove,
                    src, off + posRemove + cchRemove, cch - posRemove - cchRemove );

                newSrc = chars;
                _offSrc = _offSrc;
            }
            else
            {
                CharJoin j = new CharJoin( src, off, posRemove, src, off + posRemove + cchRemove );

                if (j._depth > CharJoin.MAX_DEPTH)
                    newSrc = saveChars( j, 0, _cchSrc );
                else
                {
                    newSrc = j;
                    _offSrc = 0;
                }
            }
        }
        
        assert isValid( newSrc, _offSrc, _cchSrc );
        
        return newSrc;
    }

    private static int sizeof ( Object src )
    {
        assert src == null || src instanceof String || src instanceof char[];
        
        if (src instanceof char[])
            return ((char[]) src).length;

        return src == null ? 0 : ((String) src).length();
    }

    private boolean canAllocate ( int cch )
    {
        return _currentBuffer == null || _currentBuffer.length - _currentOffset >= cch;
    }
    
    private char[] allocate ( int cch )
    {
        assert _currentBuffer == null || _currentBuffer.length - _currentOffset > 0;
        
        if (_currentBuffer == null)
        {
            _currentBuffer = new char [ Math.max( cch, _charBufSize ) ];
            _currentOffset = 0;
        }

        _offSrc = _currentOffset;
        _cchSrc = Math.min( _currentBuffer.length - _currentOffset, cch );

        char[] retBuf = _currentBuffer;

        assert _currentOffset + _cchSrc <= _currentBuffer.length;

        if ((_currentOffset += _cchSrc) == _currentBuffer.length)
        {
            _currentBuffer = null;
            _currentOffset = 0;
        }

        return retBuf;
    }

    public Object saveChars ( Object srcSave, int offSave, int cchSave )
    {
        return saveChars( srcSave, offSave, cchSave, null, 0, 0 );
    }
            
    public Object saveChars (
        Object srcSave, int offSave, int cchSave,
        Object srcPrev, int offPrev, int cchPrev )
    {
        // BUGBUG (ericvas)
        //
        // There is a severe degenerate situation which can deveol here.  The cases is where
        // there is a long strings of calls to saveChars, where the caller passes in prev text
        // to be prepended.  In this cases, the buffer breaks and a join is made, but because the
        // join is created, subsequent calls willproduce additional joins.  I need to figure
        // out a way that a whole bunch of joins are not created.  I really only want to create
        // joins in situations where large amount of text is manipulated.

        assert isValid( srcSave, offSave, cchSave );
        assert isValid( srcPrev, offPrev, cchPrev );

        // Allocate some space to save the text and copy it there.  This may not allocate all
        // the space I need.  This happens when I run out of buffer space.  Deal with this later.
        
        char[] srcAlloc = allocate( cchSave );
        int offAlloc = _offSrc;
        int cchAlloc = _cchSrc;

        assert cchAlloc <= cchSave;

        getChars( srcAlloc, offAlloc, srcSave, offSave, cchAlloc );

        Object srcNew;
        int offNew;

        int cchNew = cchAlloc + cchPrev;
        
        // The prev arguments specify a chunk of text which the caller wants prepended to the
        // text to be saved.  The optimization here is to detect the case where the prev text
        // and the newly allcoated and saved text are adjacent, so that I can avoid copying
        // or joining the two pieces.  The situation where this happens most is when a parser
        // reports a big piece of text in chunks, perhaps because there are entities in the
        // big chunk of text.

        CharJoin j;

        if (cchPrev == 0)
        {
            srcNew = srcAlloc;
            offNew = offAlloc;
        }
        else if (srcPrev == srcAlloc && offPrev + cchPrev == offAlloc)
        {
            assert srcPrev instanceof char[];
            
            srcNew = srcPrev;
            offNew = offPrev;
        }
        else if (srcPrev instanceof CharJoin && (j = (CharJoin) srcPrev)._srcRight == srcAlloc &&
                    offPrev + cchPrev - j._cchLeft + j._offRight == offAlloc)
        {
            assert j._srcRight instanceof char[];

            srcNew = srcPrev;
            offNew = offPrev;
        }
        else
        {
            j = new CharJoin( srcPrev, offPrev, cchPrev, srcAlloc, offAlloc );

            srcNew = j;
            offNew = 0;
            srcNew = j._depth > CharJoin.MAX_DEPTH ? saveChars( j, 0, cchNew ) : j;
        }

        // Now, srcNew and offNew specify the two parts of the triple which has the prev text and
        // part of the text to save (if not all of it).  Here I compute cchMore which is any
        // remaining text which was not allocated for earlier.  Effectively, this code deals with
        // the case where the text to save was greater than the remaining space in the buffer and
        // I need to allocate another buffer to save away the second part and then join the two.
        
        int cchMore = cchSave - cchAlloc;
        
        if (cchMore > 0)
        {
            // If we're here the the buffer got consumed.  So, this time it must allocate a new
            // buffer capable of containing all of the remaining text (no matter how large) and
            // return the beginning part of it.
            
            srcAlloc = allocate( cchMore );
            offAlloc = _offSrc;
            cchAlloc = _cchSrc;

            assert cchAlloc == cchMore;
            assert offAlloc == 0;

            getChars( srcAlloc, offAlloc, srcSave, offSave + (cchSave - cchMore), cchMore );

            j = new CharJoin( srcNew, offNew, cchNew, srcAlloc, offAlloc );
            
            offNew = 0;
            cchNew += cchMore;
            srcNew = j._depth > CharJoin.MAX_DEPTH ? saveChars( j, 0, cchNew ) : j;
        }

        _offSrc = offNew;
        _cchSrc = cchNew;
        
        assert isValid( srcNew, _offSrc, _cchSrc );
        
        return srcNew;
    }

    private static void dumpText ( PrintStream o, String s )
    {
        o.print( "\"" );

        for ( int i = 0 ; i < s.length() ; i++ )
        {
            char ch = s.charAt( i );

            if (i == 36)
            {
                o.print( "..." );
                break;
            }

            if      (ch == '\n') o.print( "\\n" );
            else if (ch == '\r') o.print( "\\r" );
            else if (ch == '\t') o.print( "\\t" );
            else if (ch == '\f') o.print( "\\f" );
            else if (ch == '\f') o.print( "\\f" );
            else if (ch == '"' ) o.print( "\\\"" );
            else                 o.print( ch );
        }

        o.print( "\"" );
    }

    public static void dump ( Object src, int off, int cch )
    {
        dumpChars( System.out, src, off, cch );
        System.out.println();
    }
    
    public static void dumpChars ( PrintStream p, Object src, int off, int cch )
    {
        p.print( "off=" + off + ", cch=" + cch + ", " );
        
        if (src == null)
            p.print( "" );
        else if (src instanceof String)
        {
            String s = (String) src;

            p.print( "String" );

            if (off != 0 || cch != s.length())
            {
                if (off < 0 || off > s.length() || off + cch < 0 || off + cch > s.length())
                {
                    p.print( " (Error)" );
                    return;
                }
            }

            //p.print( ": " );
            dumpText( p, s.substring( off, off + cch ) );
        }
        else if (src instanceof char[])
        {
            char[] chars = (char[]) src;

            p.print( "char[]" );

            if (off != 0 || cch != chars.length)
            {
                if (off < 0 || off > chars.length || off + cch < 0 || off + cch > chars.length)
                {
                    p.print( " (Error)" );
                    return;
                }
            }

            //p.print( ": " );
            dumpText( p, new String( chars, off, cch ) );
        }
        else if (src instanceof CharJoin)
        {
            p.print( "CharJoin" );

            ((CharJoin) src).dumpChars( p, off, cch );
        }
        else
        {
            p.print( "Unknown text source" );
        }
    }

    public static boolean isValid ( Object src, int off, int cch )
    {
        if (cch < 0 || off < 0)
            return false;

        if (src == null)
            return off == 0 && cch == 0;

        if (src instanceof char[])
        {
            char[] c = (char[]) src;
            return off <= c.length && off + cch <= c.length;
        }

        if (src instanceof String)
        {
            String s = (String) src;
            return off <= s.length() && off + cch <= s.length();
        }

        if (src instanceof CharJoin)
            return ((CharJoin) src).isValid( off, cch );

        return false;
    }

    //
    // Private stuff
    //
    
    public static final class CharJoin
    {
        public CharJoin (
            Object srcLeft, int offLeft, int cchLeft, Object srcRight, int offRight )
        {
            _srcLeft  = srcLeft;  _offLeft  = offLeft;  _cchLeft = cchLeft;
            _srcRight = srcRight; _offRight = offRight;

            int depth = 0;
            
            if (srcLeft instanceof CharJoin)
                depth = ((CharJoin) srcLeft)._depth;
            
            if (srcRight instanceof CharJoin)
            {
                int rightDepth = ((CharJoin) srcRight)._depth;
                
                if (rightDepth > depth)
                    depth = rightDepth;
            }
            
            _depth = depth + 1;

            assert _depth <= MAX_DEPTH + 2;
        }
        
        private int cchRight ( int off, int cch )
        {
            return Math.max( 0, cch - _cchLeft - off );
        }

        public int depth ( )
        {
            int depth = 0;
            
            if (_srcLeft instanceof CharJoin)
                depth = ((CharJoin) _srcLeft).depth();
            
            if (_srcRight instanceof CharJoin)
                depth = Math.max( ((CharJoin)_srcRight).depth(), depth );

            return depth + 1;
        }
        
        public boolean isValid ( int off, int cch )
        {
            // Deep trees cause this to take forever
            
            if (_depth > 2)
                return true;

            assert _depth == depth();
            
            if (off < 0 || cch < 0)
                return false;

            if (!CharUtil.isValid( _srcLeft, _offLeft, _cchLeft ))
                return false;

            if (!CharUtil.isValid( _srcRight, _offRight, cchRight( off, cch ) ))
                return false;

            return true;
        }

        private void getString ( StringBuffer sb, int off, int cch )
        {
            assert cch > 0;
            
            if (off < _cchLeft)
            {
                int cchL = Math.min( _cchLeft - off, cch );

                CharUtil.getString( sb, _srcLeft, _offLeft + off, cchL );

                if (cch > cchL)
                    CharUtil.getString( sb, _srcRight, _offRight, cch - cchL );
            }
            else
                CharUtil.getString( sb, _srcRight, _offRight + off - _cchLeft, cch );
        }

        private void getChars ( char[] chars, int start, int off, int cch )
        {
            assert cch > 0;

            if (off < _cchLeft)
            {
                int cchL = Math.min( _cchLeft - off, cch );
                           
                CharUtil.getChars( chars, start, _srcLeft, _offLeft + off, cchL );

                if (cch > cchL)
                    CharUtil.getChars( chars, start + cchL, _srcRight, _offRight, cch - cchL );
            }
            else
                CharUtil.getChars( chars, start, _srcRight, _offRight + off - _cchLeft, cch );
        }

        private void dumpChars( int off, int cch )
        {
            dumpChars( System.out, off, cch );
        }
        
        private void dumpChars( PrintStream p, int off, int cch )
        {
            p.print( "( " );
            CharUtil.dumpChars( p, _srcLeft, _offLeft, _cchLeft );
            p.print( ", " );
            CharUtil.dumpChars( p, _srcRight, _offRight, cchRight( off, cch ) );
            p.print( " )" );
        }
        
        //
        //
        //
        
        public final Object _srcLeft;
        public final int    _offLeft;
        public final int    _cchLeft;

        public final Object _srcRight;
        public final int    _offRight;

        public final int _depth;

        static final int MAX_DEPTH = 64;
    }

    //
    //
    //
    
    public final static class CharIterator
    {
        public void init ( Object src, int off, int cch )
        {
            init( src, off, cch, 0 );
        }
        
        public void init ( Object src, int off, int cch, int startPos )
        {
            assert isValid( src, off, cch );

            release();
            
            _srcRoot = src;
            _offRoot = off;
            _cchRoot = cch;

            _minPos = _maxPos = -1;
            
            movePos( startPos );
        }

        public void release ( )
        {
            _srcRoot = null;
            _srcLeafString = null;
            _srcLeafChars = null;
        }

        public boolean hasNext ( ) { return _pos < _cchRoot; }
        public boolean hasPrev ( ) { return _pos > 0;       }
        
        public char next ( )
        {
            assert hasNext() ;

            char ch = currentChar();

            movePos( _pos + 1 );

            return ch;
        }
            
        public char prev ( )
        {
            assert hasPrev() ;
            
            movePos( _pos - 1 );
            
            return currentChar();
        }

        public void movePos ( int newPos )
        {
            assert newPos >= 0 && newPos <= _cchRoot;

            if (newPos < _minPos || newPos > _maxPos)
            {
                // if newPos out of cached leaf, recache new leaf
                Object  src    = _srcRoot;
                int     off    = _offRoot + newPos;
                int     cch    = _cchRoot;

                for ( _offLeaf = _offRoot ; src instanceof CharJoin ; )
                {
                    CharJoin j = (CharJoin) src;

                    if (off < j._cchLeft)
                    {
                        src = j._srcLeft;
                        _offLeaf = j._offLeft;
                        off = off + j._offLeft;
                        cch = j._cchLeft;
                    }
                    else
                    {
                        src = j._srcRight;
                        _offLeaf = j._offRight;
                        off = off - (j._cchLeft - j._offRight);
                        cch = cch - j._cchLeft;
                    }
                }

//                _offLeaf = off - Math.min( off - _offLeaf, newPos );
                _minPos = newPos - (off - _offLeaf);
//                _maxPos = newPos + Math.min( _cchRoot - newPos, sizeof( src ) - off );
                _maxPos = _minPos + cch;

                if (newPos < _cchRoot)
                    _maxPos--;

                // Cache the leaf src to avoid instanceof for every char
                
                _srcLeafChars = null;
                _srcLeafString = null;

                if (src instanceof char[])
                    _srcLeafChars = (char[]) src;
                else
                    _srcLeafString = (String) src;
                
                assert newPos >= _minPos && newPos <= _maxPos;
            }

            _pos = newPos;
        }

        private char currentChar ( )
        {
            int i = _offLeaf + _pos - _minPos;
            
            return _srcLeafChars == null ? _srcLeafString.charAt( i ) : _srcLeafChars[ i ];
        }

        private Object _srcRoot; // Original triple
        private int    _offRoot;
        private int    _cchRoot;

        private int    _pos;     // Current position

        private int    _minPos;  // Min/max poses for current cached leaf
        private int    _maxPos;

        private int    _offLeaf;
        
        private String _srcLeafString;  // Cached leaf - either a char[] or a string
        private char[] _srcLeafChars;
    }

    private static int CHARUTIL_INITIAL_BUFSIZE = 1024 * 32;
    private static ThreadLocal tl_charUtil =
        new ThreadLocal() { protected Object initialValue() { return new SoftReference(new CharUtil( CHARUTIL_INITIAL_BUFSIZE )); } };

    public static void clearThreadLocals() {
        tl_charUtil.remove();
    }

    private CharIterator _charIter = new CharIterator();

    // TODO - 64 is kinda arbitrary.  Perhaps it should be configurable.
    private static final int MAX_COPY = 64;

    // Current char buffer we're allcoating new chars to

    private int    _charBufSize;
    private int    _currentOffset;
    private char[] _currentBuffer;
    
    // These members are used to communicate offset and character count
    // information back to a caller of various methods on CharUtil.
    // Usually, the methods returns the src Object, and these two hold
    // the offset and the char count.
    
    public int _offSrc;
    public int _cchSrc;
} 




© 2015 - 2025 Weber Informatics LLC | Privacy Policy