All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.xmlbeans.impl.store.CharUtil Maven / Gradle / Ivy

The newest version!
/*   Copyright 2004 The Apache Software Foundation
 *
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *  limitations under the License.
 */
package org.apache.xmlbeans.impl.store;

import java.io.PrintStream;
import java.lang.ref.SoftReference;

public final class CharUtil {
    public CharUtil(int charBufSize) {
        _charBufSize = charBufSize;
    }

    public CharIterator getCharIterator(Object src, int off, int cch) {
        _charIter.init(src, off, cch);
        return _charIter;
    }

    public CharIterator getCharIterator(Object src, int off, int cch, int start) {
        _charIter.init(src, off, cch, start);
        return _charIter;
    }

    public static CharUtil getThreadLocalCharUtil() {
        SoftReference softRef = tl_charUtil.get();
        CharUtil charUtil = softRef.get();
        if (charUtil == null) {
            charUtil = new CharUtil(CHARUTIL_INITIAL_BUFSIZE);
            tl_charUtil.set(new SoftReference<>(charUtil));
        }
        return charUtil;
    }

    public static void getString(StringBuffer sb, Object src, int off, int cch) {
        assert isValid(src, off, cch);

        if (cch == 0) {
            return;
        }

        if (src instanceof char[]) {
            sb.append((char[]) src, off, cch);
        } else if (src instanceof String) {
            String s = (String) src;

            if (off == 0 && cch == s.length()) {
                sb.append(s);
            } else {
                sb.append(s, off, off + cch);
            }
        } else {
            ((CharJoin) src).getString(sb, off, cch);
        }
    }

    public static void getChars(char[] chars, int start, Object src, int off, int cch) {
        assert isValid(src, off, cch);
        assert chars != null && start >= 0 && start <= chars.length;

        if (cch == 0) {
            return;
        }

        if (src instanceof char[]) {
            char[] cs = (char[]) src;
            System.arraycopy(cs, off, chars, start, cch);
        } else if (src instanceof String) {
            ((String) src).getChars(off, off + cch, chars, start);
        } else {
            ((CharJoin) src).getChars(chars, start, off, cch);
        }
    }

    public static String getString(Object src, int off, int cch) {
        assert isValid(src, off, cch);

        if (cch == 0) {
            return "";
        }

        if (src instanceof char[]) {
            return new String((char[]) src, off, cch);
        }

        if (src instanceof String) {
            String s = (String) src;

            if (off == 0 && cch == s.length()) {
                return s;
            }

            return s.substring(off, off + cch);
        }

        StringBuffer sb = new StringBuffer();

        ((CharJoin) src).getString(sb, off, cch);

        return sb.toString();
    }

    public static boolean isWhiteSpace(char ch) {
        switch (ch) {
            case ' ':
            case '\t':
            case '\n':
            case '\r':
                return true;
            default:
                return false;
        }
    }

    public final boolean isWhiteSpace(Object src, int off, int cch) {
        assert isValid(src, off, cch);

        if (cch <= 0) {
            return true;
        }

        if (src instanceof char[]) {
            for (char[] chars = (char[]) src; cch > 0; cch--) {
                if (!isWhiteSpace(chars[off++])) {
                    return false;
                }
            }

            return true;
        }

        if (src instanceof String) {
            for (String s = (String) src; cch > 0; cch--) {
                if (!isWhiteSpace(s.charAt(off++))) {
                    return false;
                }
            }

            return true;
        }

        boolean isWhite = true;

        for (_charIter.init(src, off, cch); _charIter.hasNext(); ) {
            if (!isWhiteSpace(_charIter.next())) {
                isWhite = false;
                break;
            }
        }

        _charIter.release();

        return isWhite;
    }

    public Object stripLeft(Object src, int off, int cch) {
        assert isValid(src, off, cch);

        if (cch > 0) {
            if (src instanceof char[]) {
                char[] chars = (char[]) src;

                while (cch > 0 && isWhiteSpace(chars[off])) {
                    cch--;
                    off++;
                }
            } else if (src instanceof String) {
                String s = (String) src;

                while (cch > 0 && isWhiteSpace(s.charAt(off))) {
                    cch--;
                    off++;
                }
            } else {
                int count = 0;

                for (_charIter.init(src, off, cch); _charIter.hasNext(); count++) {
                    if (!isWhiteSpace(_charIter.next())) {
                        break;
                    }
                }

                _charIter.release();

                off += count;
            }
        }

        if (cch == 0) {
            _offSrc = 0;
            _cchSrc = 0;

            return null;
        }

        _offSrc = off;
        _cchSrc = cch;

        return src;
    }

    public Object stripRight(Object src, int off, int cch) {
        assert isValid(src, off, cch);

        if (cch > 0) {
            for (_charIter.init(src, off, cch, cch); _charIter.hasPrev(); cch--) {
                if (!isWhiteSpace(_charIter.prev())) {
                    break;
                }
            }

            _charIter.release();
        }

        if (cch == 0) {
            _offSrc = 0;
            _cchSrc = 0;

            return null;
        }

        _offSrc = off;
        _cchSrc = cch;

        return src;
    }

    public Object insertChars(
        int posInsert,
        Object src, int off, int cch,
        Object srcInsert, int offInsert, int cchInsert) {
        assert isValid(src, off, cch);
        assert isValid(srcInsert, offInsert, cchInsert);
        assert posInsert >= 0 && posInsert <= cch;

        // TODO - at some point, instead of creating joins, I should
        // normalize all the text into a single buffer to stop large
        // tree's from being built when many modifications happen...

        // TODO - actually, I should see if the size of the new char
        // sequence is small enough to simply allocate a new contigous
        // sequence, either in a common char[] managed by the master,
        // or just create a new string ... this goes for remove chars
        // as well.

        if (cchInsert == 0) {
            _cchSrc = cch;
            _offSrc = off;
            return src;
        }

        if (cch == 0) {
            _cchSrc = cchInsert;
            _offSrc = offInsert;
            return srcInsert;
        }

        _cchSrc = cch + cchInsert;

        Object newSrc;

        if (_cchSrc <= MAX_COPY && canAllocate(_cchSrc)) {
            char[] c = allocate(_cchSrc);

            getChars(c, _offSrc, src, off, posInsert);
            getChars(c, _offSrc + posInsert, srcInsert, offInsert, cchInsert);
            getChars(c, _offSrc + posInsert + cchInsert, src, off + posInsert, cch - posInsert);

            newSrc = c;
        } else {
            _offSrc = 0;

            CharJoin newJoin;

            if (posInsert == 0) {
                newJoin = new CharJoin(srcInsert, offInsert, cchInsert, src, off);
            } else if (posInsert == cch) {
                newJoin = new CharJoin(src, off, cch, srcInsert, offInsert);
            } else {
                CharJoin j = new CharJoin(src, off, posInsert, srcInsert, offInsert);
                newJoin = new CharJoin(j, 0, posInsert + cchInsert, src, off + posInsert);
            }

            if (newJoin._depth > CharJoin.MAX_DEPTH) {
                newSrc = saveChars(newJoin, _offSrc, _cchSrc);
            } else {
                newSrc = newJoin;
            }
        }

        assert isValid(newSrc, _offSrc, _cchSrc);

        return newSrc;
    }

    public Object removeChars(int posRemove, int cchRemove, Object src, int off, int cch) {
        assert isValid(src, off, cch);
        assert posRemove >= 0 && posRemove <= cch;
        assert cchRemove >= 0 && posRemove + cchRemove <= cch;

        Object newSrc;

        _cchSrc = cch - cchRemove;

        if (_cchSrc == 0) {
            newSrc = null;
            _offSrc = 0;
        } else if (posRemove == 0) {
            newSrc = src;
            _offSrc = off + cchRemove;
        } else if (posRemove + cchRemove == cch) {
            newSrc = src;
            _offSrc = off;
        } else {
            int cchAfter = cch - cchRemove;

            if (cchAfter <= MAX_COPY && canAllocate(cchAfter)) {
                char[] chars = allocate(cchAfter);

                getChars(chars, _offSrc, src, off, posRemove);


                getChars(
                    chars, _offSrc + posRemove,
                    src, off + posRemove + cchRemove, cch - posRemove - cchRemove);

                newSrc = chars;
                // _offSrc = _offSrc;
            } else {
                CharJoin j = new CharJoin(src, off, posRemove, src, off + posRemove + cchRemove);

                if (j._depth > CharJoin.MAX_DEPTH) {
                    newSrc = saveChars(j, 0, _cchSrc);
                } else {
                    newSrc = j;
                    _offSrc = 0;
                }
            }
        }

        assert isValid(newSrc, _offSrc, _cchSrc);

        return newSrc;
    }

    private boolean canAllocate(int cch) {
        return _currentBuffer == null || _currentBuffer.length - _currentOffset >= cch;
    }

    private char[] allocate(int cch) {
        assert _currentBuffer == null || _currentBuffer.length - _currentOffset > 0;

        if (_currentBuffer == null) {
            _currentBuffer = new char[Math.max(cch, _charBufSize)];
            _currentOffset = 0;
        }

        _offSrc = _currentOffset;
        _cchSrc = Math.min(_currentBuffer.length - _currentOffset, cch);

        char[] retBuf = _currentBuffer;

        assert _currentOffset + _cchSrc <= _currentBuffer.length;

        if ((_currentOffset += _cchSrc) == _currentBuffer.length) {
            _currentBuffer = null;
            _currentOffset = 0;
        }

        return retBuf;
    }

    public Object saveChars(Object srcSave, int offSave, int cchSave) {
        return saveChars(srcSave, offSave, cchSave, null, 0, 0);
    }

    public Object saveChars(
        Object srcSave, int offSave, int cchSave,
        Object srcPrev, int offPrev, int cchPrev) {
        // BUGBUG (ericvas)
        //
        // There is a severe degenerate situation which can deveol here.  The cases is where
        // there is a long strings of calls to saveChars, where the caller passes in prev text
        // to be prepended.  In this cases, the buffer breaks and a join is made, but because the
        // join is created, subsequent calls willproduce additional joins.  I need to figure
        // out a way that a whole bunch of joins are not created.  I really only want to create
        // joins in situations where large amount of text is manipulated.

        assert isValid(srcSave, offSave, cchSave);
        assert isValid(srcPrev, offPrev, cchPrev);

        // Allocate some space to save the text and copy it there.  This may not allocate all
        // the space I need.  This happens when I run out of buffer space.  Deal with this later.

        char[] srcAlloc = allocate(cchSave);
        int offAlloc = _offSrc;
        int cchAlloc = _cchSrc;

        assert cchAlloc <= cchSave;

        getChars(srcAlloc, offAlloc, srcSave, offSave, cchAlloc);

        Object srcNew;
        int offNew;

        int cchNew = cchAlloc + cchPrev;

        // The prev arguments specify a chunk of text which the caller wants prepended to the
        // text to be saved.  The optimization here is to detect the case where the prev text
        // and the newly allcoated and saved text are adjacent, so that I can avoid copying
        // or joining the two pieces.  The situation where this happens most is when a parser
        // reports a big piece of text in chunks, perhaps because there are entities in the
        // big chunk of text.

        CharJoin j;

        if (cchPrev == 0) {
            srcNew = srcAlloc;
            offNew = offAlloc;
        } else if (srcPrev == srcAlloc && offPrev + cchPrev == offAlloc) {
            //noinspection ConstantConditions
            assert srcPrev instanceof char[];

            srcNew = srcPrev;
            offNew = offPrev;
        } else if (srcPrev instanceof CharJoin && (j = (CharJoin) srcPrev)._srcRight == srcAlloc &&
                   offPrev + cchPrev - j._cchLeft + j._offRight == offAlloc) {
            assert j._srcRight instanceof char[];

            srcNew = srcPrev;
            offNew = offPrev;
        } else {
            j = new CharJoin(srcPrev, offPrev, cchPrev, srcAlloc, offAlloc);

            offNew = 0;
            srcNew = j._depth > CharJoin.MAX_DEPTH ? saveChars(j, 0, cchNew) : j;
        }

        // Now, srcNew and offNew specify the two parts of the triple which has the prev text and
        // part of the text to save (if not all of it).  Here I compute cchMore which is any
        // remaining text which was not allocated for earlier.  Effectively, this code deals with
        // the case where the text to save was greater than the remaining space in the buffer and
        // I need to allocate another buffer to save away the second part and then join the two.

        int cchMore = cchSave - cchAlloc;

        if (cchMore > 0) {
            // If we're here the the buffer got consumed.  So, this time it must allocate a new
            // buffer capable of containing all of the remaining text (no matter how large) and
            // return the beginning part of it.

            srcAlloc = allocate(cchMore);
            offAlloc = _offSrc;
            cchAlloc = _cchSrc;

            assert cchAlloc == cchMore;
            assert offAlloc == 0;

            getChars(srcAlloc, offAlloc, srcSave, offSave + (cchSave - cchMore), cchMore);

            j = new CharJoin(srcNew, offNew, cchNew, srcAlloc, offAlloc);

            offNew = 0;
            cchNew += cchMore;
            srcNew = j._depth > CharJoin.MAX_DEPTH ? saveChars(j, 0, cchNew) : j;
        }

        _offSrc = offNew;
        _cchSrc = cchNew;

        assert isValid(srcNew, _offSrc, _cchSrc);

        return srcNew;
    }

    private static void dumpText(PrintStream o, String s) {
        o.print("\"");

        for (int i = 0; i < s.length(); i++) {
            char ch = s.charAt(i);

            if (i == 36) {
                o.print("...");
                break;
            }

            switch (ch) {
                case '\n':
                    o.print("\\n");
                    break;
                case '\r':
                    o.print("\\r");
                    break;
                case '\t':
                    o.print("\\t");
                    break;
                case '\f':
                    o.print("\\f");
                    break;
                case '"':
                    o.print("\\\"");
                    break;
                default:
                    o.print(ch);
                    break;
            }
        }

        o.print("\"");
    }

    public static void dump(Object src, int off, int cch) {
        dumpChars(System.out, src, off, cch);
        System.out.println();
    }

    public static void dumpChars(PrintStream p, Object src, int off, int cch) {
        p.print("off=" + off + ", cch=" + cch + ", ");

        if (src == null) {
            p.print("");
        } else if (src instanceof String) {
            String s = (String) src;

            p.print("String");

            if (off != 0 || cch != s.length()) {
                if (off < 0 || off > s.length() || off + cch < 0 || off + cch > s.length()) {
                    p.print(" (Error)");
                    return;
                }
            }

            //p.print( ": " );
            dumpText(p, s.substring(off, off + cch));
        } else if (src instanceof char[]) {
            char[] chars = (char[]) src;

            p.print("char[]");

            if (off != 0 || cch != chars.length) {
                if (off < 0 || off > chars.length || off + cch < 0 || off + cch > chars.length) {
                    p.print(" (Error)");
                    return;
                }
            }

            //p.print( ": " );
            dumpText(p, new String(chars, off, cch));
        } else if (src instanceof CharJoin) {
            p.print("CharJoin");

            ((CharJoin) src).dumpChars(p, off, cch);
        } else {
            p.print("Unknown text source");
        }
    }

    public static boolean isValid(Object src, int off, int cch) {
        if (cch < 0 || off < 0) {
            return false;
        }

        if (src == null) {
            return off == 0 && cch == 0;
        }

        if (src instanceof char[]) {
            char[] c = (char[]) src;
            return off <= c.length && off + cch <= c.length;
        }

        if (src instanceof String) {
            String s = (String) src;
            return off <= s.length() && off + cch <= s.length();
        }

        if (src instanceof CharJoin) {
            return ((CharJoin) src).isValid(off, cch);
        }

        return false;
    }

    //
    // Private stuff
    //

    public static final class CharJoin {
        public CharJoin(
            Object srcLeft, int offLeft, int cchLeft, Object srcRight, int offRight) {
            _srcLeft = srcLeft;
            _offLeft = offLeft;
            _cchLeft = cchLeft;
            _srcRight = srcRight;
            _offRight = offRight;

            int depth = 0;

            if (srcLeft instanceof CharJoin) {
                depth = ((CharJoin) srcLeft)._depth;
            }

            if (srcRight instanceof CharJoin) {
                int rightDepth = ((CharJoin) srcRight)._depth;

                if (rightDepth > depth) {
                    depth = rightDepth;
                }
            }

            _depth = depth + 1;

            assert _depth <= MAX_DEPTH + 2;
        }

        private int cchRight(int off, int cch) {
            return Math.max(0, cch - _cchLeft - off);
        }

        public int depth() {
            int depth = 0;

            if (_srcLeft instanceof CharJoin) {
                depth = ((CharJoin) _srcLeft).depth();
            }

            if (_srcRight instanceof CharJoin) {
                depth = Math.max(((CharJoin) _srcRight).depth(), depth);
            }

            return depth + 1;
        }

        public boolean isValid(int off, int cch) {
            // Deep trees cause this to take forever

            if (_depth > 2) {
                return true;
            }

            assert _depth == depth();

            if (off < 0 || cch < 0) {
                return false;
            }

            if (!CharUtil.isValid(_srcLeft, _offLeft, _cchLeft)) {
                return false;
            }

            return CharUtil.isValid(_srcRight, _offRight, cchRight(off, cch));
        }

        private void getString(StringBuffer sb, int off, int cch) {
            assert cch > 0;

            if (off < _cchLeft) {
                int cchL = Math.min(_cchLeft - off, cch);

                CharUtil.getString(sb, _srcLeft, _offLeft + off, cchL);

                if (cch > cchL) {
                    CharUtil.getString(sb, _srcRight, _offRight, cch - cchL);
                }
            } else {
                CharUtil.getString(sb, _srcRight, _offRight + off - _cchLeft, cch);
            }
        }

        private void getChars(char[] chars, int start, int off, int cch) {
            assert cch > 0;

            if (off < _cchLeft) {
                int cchL = Math.min(_cchLeft - off, cch);

                CharUtil.getChars(chars, start, _srcLeft, _offLeft + off, cchL);

                if (cch > cchL) {
                    CharUtil.getChars(chars, start + cchL, _srcRight, _offRight, cch - cchL);
                }
            } else {
                CharUtil.getChars(chars, start, _srcRight, _offRight + off - _cchLeft, cch);
            }
        }

        private void dumpChars(PrintStream p, int off, int cch) {
            p.print("( ");
            CharUtil.dumpChars(p, _srcLeft, _offLeft, _cchLeft);
            p.print(", ");
            CharUtil.dumpChars(p, _srcRight, _offRight, cchRight(off, cch));
            p.print(" )");
        }

        //
        //
        //

        public final Object _srcLeft;
        public final int _offLeft;
        public final int _cchLeft;

        public final Object _srcRight;
        public final int _offRight;

        public final int _depth;

        static final int MAX_DEPTH = 64;
    }

    //
    //
    //

    public final static class CharIterator {
        public void init(Object src, int off, int cch) {
            init(src, off, cch, 0);
        }

        public void init(Object src, int off, int cch, int startPos) {
            assert isValid(src, off, cch);

            release();

            _srcRoot = src;
            _offRoot = off;
            _cchRoot = cch;

            _minPos = _maxPos = -1;

            movePos(startPos);
        }

        public void release() {
            _srcRoot = null;
            _srcLeafString = null;
            _srcLeafChars = null;
        }

        public boolean hasNext() {
            return _pos < _cchRoot;
        }

        public boolean hasPrev() {
            return _pos > 0;
        }

        public char next() {
            assert hasNext();

            char ch = currentChar();

            movePos(_pos + 1);

            return ch;
        }

        public char prev() {
            assert hasPrev();

            movePos(_pos - 1);

            return currentChar();
        }

        public void movePos(int newPos) {
            assert newPos >= 0 && newPos <= _cchRoot;

            if (newPos < _minPos || newPos > _maxPos) {
                // if newPos out of cached leaf, recache new leaf
                Object src = _srcRoot;
                int off = _offRoot + newPos;
                int cch = _cchRoot;

                for (_offLeaf = _offRoot; src instanceof CharJoin; ) {
                    CharJoin j = (CharJoin) src;

                    if (off < j._cchLeft) {
                        src = j._srcLeft;
                        _offLeaf = j._offLeft;
                        off = off + j._offLeft;
                        cch = j._cchLeft;
                    } else {
                        src = j._srcRight;
                        _offLeaf = j._offRight;
                        off = off - (j._cchLeft - j._offRight);
                        cch = cch - j._cchLeft;
                    }
                }

//                _offLeaf = off - Math.min( off - _offLeaf, newPos );
                _minPos = newPos - (off - _offLeaf);
//                _maxPos = newPos + Math.min( _cchRoot - newPos, sizeof( src ) - off );
                _maxPos = _minPos + cch;

                if (newPos < _cchRoot) {
                    _maxPos--;
                }

                // Cache the leaf src to avoid instanceof for every char

                _srcLeafChars = null;
                _srcLeafString = null;

                if (src instanceof char[]) {
                    _srcLeafChars = (char[]) src;
                } else {
                    _srcLeafString = (String) src;
                }

                assert newPos >= _minPos && newPos <= _maxPos;
            }

            _pos = newPos;
        }

        private char currentChar() {
            int i = _offLeaf + _pos - _minPos;

            return _srcLeafChars == null ? _srcLeafString.charAt(i) : _srcLeafChars[i];
        }

        private Object _srcRoot; // Original triple
        private int _offRoot;
        private int _cchRoot;

        private int _pos;     // Current position

        private int _minPos;  // Min/max poses for current cached leaf
        private int _maxPos;

        private int _offLeaf;

        private String _srcLeafString;  // Cached leaf - either a char[] or a string
        private char[] _srcLeafChars;
    }

    private static final int CHARUTIL_INITIAL_BUFSIZE = 1024 * 32;
    private static final ThreadLocal> tl_charUtil =
        ThreadLocal.withInitial(() -> new SoftReference<>(new CharUtil(CHARUTIL_INITIAL_BUFSIZE)));

    public static void clearThreadLocals() {
        tl_charUtil.remove();
    }

    private final CharIterator _charIter = new CharIterator();

    // TODO - 64 is kinda arbitrary.  Perhaps it should be configurable.
    private static final int MAX_COPY = 64;

    // Current char buffer we're allcoating new chars to

    private final int _charBufSize;
    private int _currentOffset;
    private char[] _currentBuffer;

    // These members are used to communicate offset and character count
    // information back to a caller of various methods on CharUtil.
    // Usually, the methods returns the src Object, and these two hold
    // the offset and the char count.

    public int _offSrc;
    public int _cchSrc;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy