All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jruby.util.ByteList Maven / Gradle / Ivy

/***** BEGIN LICENSE BLOCK *****
 * Version: EPL 1.0/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Eclipse Public
 * License Version 1.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.eclipse.org/legal/epl-v10.html
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * Copyright (C) 2007-2010 JRuby Community
 * Copyright (C) 2007 Charles O Nutter 
 * Copyright (C) 2007 Nick Sieger 
 * Copyright (C) 2007 Ola Bini 
 * Copyright (C) 2007 William N Dortch 
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either of the GNU General Public License Version 2 or later (the "GPL"),
 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the EPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the EPL, the GPL or the LGPL.
 ***** END LICENSE BLOCK *****/
package org.jruby.util;

import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.jcodings.Encoding;
import org.jcodings.ascii.AsciiTables;
import org.jcodings.specific.ASCIIEncoding;

/**
 * ByteList is simple a collection of bytes in the same way a Java String is a collection
 * of characters. However, its API resembles StringBuffer/StringBuilder more than String
 * because it is a mutable object.
 */
@SuppressWarnings("deprecation")
public class ByteList implements Comparable, CharSequence, Serializable {
    private static final long serialVersionUID = -1286166947275543731L;

    public static final byte[] NULL_ARRAY = new byte[0];
    public static final ByteList EMPTY_BYTELIST = new ByteList(NULL_ARRAY, false);

    // NOTE: AR-JDBC (still) uses these fields directly in its ext .java parts  ,
    // until there's new releases we shall keep them public and maybe review other exts using BL's API
    @Deprecated
    public byte[] bytes;
    @Deprecated
    public int begin;
    @Deprecated
    public int realSize;

    private Encoding encoding = ASCIIEncoding.INSTANCE;

    transient int hash;

    transient String stringValue;

    private static final int DEFAULT_SIZE = 4;

    /**
     * Creates a new instance of ByteList
     */
    public ByteList() {
        this(DEFAULT_SIZE);
    }

    /**
     * Creates a new instance of Bytelist with a pre-allocated size.  If you know the size ahead
     * of time this saves additional array allocations to grow the bytelist to the proper size.
     *
     * @param size to preallocate the bytelist to
     */
    public ByteList(int size) {
        bytes = new byte[size];
        realSize = 0;
    }

    /**
     * Create a new instance of ByteList with the bytes supplied using the specified encoding.
     *
     * Important: bytes is used as the initial backing store for the bytelist.  Over time as the
     * bytelist is mutated this backing store may be replaced with a new one to hold the additional
     * bytes.  If you pass in bytes and then modify the contents of the original bytes, then those
     * changes will get reflected.
     *
     * @param bytes to use
     * @param encoding
     */
    // TODO: Deprecate and replace with a static method which implies the caveats of this constructor.
    public ByteList(byte[] bytes, Encoding encoding) {
        this.bytes = bytes;
        this.realSize = bytes.length;
        this.encoding = safeEncoding(encoding);
    }

    /**
     * Create a new instance of ByteList with the contents of wrap.  This constructor will make
     * a copy of bytes passed
     *
     * @param wrap the initial bytes for this ByteList
     */
    public ByteList(byte[] wrap) {
        this(wrap, true);
    }

    /**
     * Create a new instance of ByteList with the contents of wrap.  If copy is true then it will
     * array copy the contents.  Otherwise it will use the byte array passed in as its initial
     * backing store.
     *
     * @param wrap the initial bytes for this ByteList
     * @param copy whether to arraycopy wrap for the backing store or not
     */
    public ByteList(byte[] wrap, boolean copy) {
        this(wrap, ASCIIEncoding.INSTANCE, copy);
    }

    /**
     * Create a new instance of ByteList with the contents of wrap.  If copy is true then it will
     * array copy the contents.  Otherwise it will use the byte array passed in as its initial
     * backing store.
     *
     * @param wrap the initial bytes for this ByteList
     * @param encoding the encoding for the bytes
     * @param copy whether to arraycopy wrap for the backing store or not
     */
    public ByteList(byte[] wrap, Encoding encoding, boolean copy) {
        assert wrap != null;
        if (copy) {
            this.bytes = wrap.clone();
        } else {
            this.bytes = wrap;
        }
        this.realSize = wrap.length;
        this.encoding = safeEncoding(encoding);
    }

    /**
     * Create a new instance of byte list with the same contents as the passed in ByteList wrap.
     * Note that this does array copy the data for the new objects initial backing store.
     *
     * @param wrap is contents for new ByteList
     */
    public ByteList(ByteList wrap) {
        this(wrap.bytes, wrap.begin, wrap.realSize, wrap.encoding, true);
    }

    /**
     * Create a new instance of ByteList using wrap as a backing store where index is the first
     * index in the byte array where the data starts and len indicates how long the data portion
     * of the bytelist is.  wrap will be array copied in this constructor.
     *
     * @param wrap the bytes to use
     * @param index where in the bytes the data starts
     * @param len how long the data is in the wrap array
     */
    public ByteList(byte[] wrap, int index, int len) {
        this(wrap, index, len, true);
    }

    /**
     * Create a new instance of ByteList using wrap as a backing store where index is the first
     * index in the byte array where the data starts and len indicates how long the data portion
     * of the bytelist is.  wrap will be array copied if copy is true OR if index != 0.
     *
     * @param wrap the bytes to use
     * @param index where in the bytes the data starts
     * @param len how long the data is in the wrap array
     * @param copy if true array copy wrap. otherwise use as backing store
     */
    public ByteList(byte[] wrap, int index, int len, boolean copy) {
        this(wrap, index, len, ASCIIEncoding.INSTANCE, copy);
    }

    /**
     * Create a new instance of ByteList using wrap as a backing store where index is the first
     * index in the byte array where the data starts and len indicates how long the data portion
     * of the bytelist is.  wrap will be array copied if copy is true OR if index != 0.
     *
     * @param wrap the bytes to use
     * @param index where in the bytes the data starts
     * @param len how long the data is in the wrap array
     * @param copy if true array copy wrap. otherwise use as backing store
     */
    public ByteList(byte[] wrap, int index, int len, Encoding encoding, boolean copy) {
        assert wrap != null : "'wrap' must not be null";
        assert index >= 0 && index <= wrap.length : "'index' is not without bounds of 'wrap' array";
        assert wrap.length >= index + len : "'index' + 'len' is longer than the 'wrap' array";

        if (copy) {
            bytes = new byte[len];
            System.arraycopy(wrap, index, bytes, 0, len);
        } else {
            begin = index;
            bytes = wrap;
        }
        realSize = len;
        this.encoding = safeEncoding(encoding);
    }

    /**
     * Create a new instance of ByteList using wrap as a backing store where index is the first
     * index in the byte array where the data starts and len indicates how long the data portion
     * of the bytelist is.  wrap's byte array will be array copied for initial backing store.
     *
     * @param wrap the bytes to use
     * @param index where in the bytes the data starts
     * @param len how long the data is in the wrap array
     */
    public ByteList(ByteList wrap, int index, int len) {
        this(wrap.bytes, wrap.begin + index, len);
        encoding = wrap.encoding;
    }

    /**
     * Delete len bytes from start index.  This does no bullet-proofing so it is your
     * responsibility to ensure you do not run off the backing store array.
     *
     * @param start index to delete from
     * @param len number of bytes to delete
     */
    public void delete(int start, int len) {
        assert start >= begin && start < realSize : "'start' is at invalid index";
        assert len >= 0 : "'len' must be positive";
        assert start + len <= begin + realSize : "too many bytes requested";

        realSize -= len;

        System.arraycopy(bytes, start + len, bytes, start, realSize);
        invalidate();
    }

    /**
     * Append the byte b up to len times onto the end of the current ByteList.
     *
     * @param b is byte to be appended
     * @param len is number of times to repeat the append
     */
    // FIXME: Innefficient impl since we know the len up front.
    public void fill(int b, int len) {
        for ( ; --len >= 0; ) {
            append(b);
        }
        invalidate();
    }

    /**
     * @see Object#clone()
     */
    @Override
    public Object clone() {
        return dup();
    }

    /**
     * creates a duplicate of this bytelist but only in the case of a stringValue and its resulting
     * hash value.  No other elements are duplicated.
     */
    public ByteList dup() {
        ByteList dup = dup(realSize);
        dup.hash = hash;
        dup.stringValue = stringValue;
        return dup;
    }

    /**
     * Create a new ByteList but do not array copy the byte backing store.
     *
     * @return a new ByteList with same backing store
     */
    public ByteList shallowDup() {
        ByteList dup = new  ByteList(bytes, false);
        dup.realSize = realSize;
        dup.begin = begin;
        dup.encoding = safeEncoding(encoding);
        dup.hash = hash;
        dup.stringValue = stringValue;
        return dup;
    }

    /**
     * @param length is the value of how big the buffer is going to be, not the actual length to copy
     *
     * It is used by RubyString.modify(int) to prevent COW pathological situations
     * (namely to COW with having length - realSize bytes ahead)
     */
    public ByteList dup(int length) {
        ByteList dup = new ByteList(length);

        dup.append(this.bytes, this.begin, this.realSize);
        dup.encoding = safeEncoding(encoding);

        return dup;
    }

    /**
     * Ensure that the bytelist is at least length bytes long.  Otherwise grow the backing store
     * so that it is length bytes long
     *
     * @param length to use to make sure ByteList is long enough
     */
    public void ensure(int length) {
        if (begin + length > bytes.length) {
            byte[] tmp = new byte[Math.min(Integer.MAX_VALUE, length + (length >>> 1))];
            System.arraycopy(bytes, begin, tmp, 0, realSize);
            bytes = tmp;
            begin = 0;
            invalidate();
        }
    }

    /**
     * Make a shared copy of this ByteList.  This is used for COW'ing ByteLists, you typically
     * want a piece of the same backing store to be shared across ByteBuffers, while those
     * ByteLists will be pointing at different indexes and lengths of the same backing store.
     *
     * Note: that this does not update hash or stringValue.
     *
     * @param index new begin value for shared ByteBuffer
     * @param len new length/realSize for chared
     * @return
     */
    public ByteList makeShared(int index, int len) {
        ByteList shared = new ByteList(bytes, encoding);

        shared.realSize = len;
        shared.begin = begin + index;

        return shared;
    }

    /**
     * Change ByteBuffer to have a new begin that is +index positions past begin with a new length.
     *
     * @param index new value to add to begin
     * @param len the new realSize/length value
     */
    public void view(int index, int len) {
        realSize = len;
        begin = begin + index;
        invalidate();
    }

    /**
     * Array copy the byte backing store so that you can guarantee that no other objects are
     * referencing this objects backing store.
     */
    public void unshare() {
        unshare(realSize);
    }

    /**
     * Array copy the byte backing store so that you can guarantee that no other objects are
     * referencing this objects backing store.  This version on unshare allows a length to be
     * specified which will copy length bytes from the old backing store.
     *
     * @param length is the value of how big the buffer is going to be, not the actual length to copy
     *
     * It is used by RubyString.modify(int) to prevent COW pathological situations
     * (namely to COW with having length - realSize bytes ahead)
     */
    public void unshare(int length) {
        byte[] tmp = new byte[length];
        System.arraycopy(bytes, begin, tmp, 0, Math.min(realSize, length));
        bytes = tmp;
        begin = 0;
    }

    /**
     * Invalidate the hash and stringValue which may have been cached in this ByteList.
     */
    public void invalidate() {
        hash = 0;
        stringValue = null;
    }

    /**
     * Prepend a byte onto the front of this ByteList.
     *
     * @param b is the byte to be prepended
     */
    public void prepend(byte b) {
        grow(1);
        System.arraycopy(bytes, begin + 0, bytes, begin + 1, realSize);
        bytes[begin + 0] = b;
        realSize++;
        invalidate();
    }

    /**
     * Append a single byte to the ByteList
     *
     * @param b the byte to be added
     * @return this instance
     */
    public ByteList append(byte b) {
        grow(1);
        bytes[begin + realSize] = b;
        realSize++;
        invalidate();
        return this;
    }

    /**
     * Append a single int to the ByteList
     *
     * @param b the int to be added
     * @return this instance
     */
    public ByteList append(int b) {
        append((byte)b);
        return this;
    }

    /**
     * Append up to length bytes from InputStream to the ByteList.   If no bytes are read from the
     * stream then throw an IOException.
     *
     * @param input the stream to read bytes from
     * @param length how many byte to try and read
     * @return this instance
     * @throws IOException when no bytes are read
     */
    public ByteList append(InputStream input, int length) throws IOException {
        grow(length);
        int read = 0;
        int n;
        int start = begin + realSize;
        while (read < length) {
            n = input.read(bytes, start + read, length - read);
            if (n == -1) {
                if (read == 0) throw new java.io.EOFException();
                break;
            }
            read += n;
        }

        realSize += read;
        invalidate();
        return this;
    }

    /**
     * Append contents of the supplied nio ByteList up to len length onto the end of this
     * ByteBuffer.
     *
     * @param buffer to be appended
     * @param len is number of bytes you hoping to get from the ByteBuffer
     */
    public void append(ByteBuffer buffer, int len) {
        grow(len);
        buffer.get(bytes, begin + realSize, len);
        realSize += len;
        invalidate();
    }

    /**
     * Append moreBytes onto the end of the current ByteList.
     *
     * @param moreBytes to be added.
     */
    public void append(byte[] moreBytes) {
        assert moreBytes != null : "moreBytes is null";

        grow(moreBytes.length);
        System.arraycopy(moreBytes, 0, bytes, begin + realSize, moreBytes.length);
        realSize += moreBytes.length;
        invalidate();
    }

    /**
     * Append moreBytes onto the end of the current ByteList.
     *
     * @param moreBytes to be added.
     */
    public void append(ByteList moreBytes) {
        append(moreBytes.bytes, moreBytes.begin, moreBytes.realSize);
    }

    /**
     * Append moreBytes onto the end of the current ByteList with +index as the new begin for
     * len bytes from the moreBytes ByteList.
     *
     * @param moreBytes to be added.
     * @param index new index past current begin value
     * @param len is the number of bytes to append from source ByteList
     */
    public void append(ByteList moreBytes, int index, int len) {
        append(moreBytes.bytes, moreBytes.begin + index, len);
    }

    /**
     * Append moreBytes onto the end of the current ByteList with start as the new begin for
     * len bytes from the moreBytes byte array.
     *
     * @param moreBytes to be added.
     * @param start is the new begin value
     * @param len is the number of bytes to append from source byte array
     */
    public void append(byte[] moreBytes, int start, int len) {
        assert moreBytes != null : "moreBytes is null";
        assert start >= 0 && (start == 0 || start < moreBytes.length) : "Invalid start: " + start;
        assert len >= 0 && moreBytes.length - start >= len : "Bad length: " + len;

        grow(len);
        System.arraycopy(moreBytes, start, bytes, begin + realSize, len);
        realSize += len;
        invalidate();
    }

    /**
     * Resize the ByteList's backing store to be length in size.  Note that this forces the backing
     * store to array copy regardless of ByteLists current size or contents.  It essentially will
     * end any COWing.
     *
     * @param length the new length for the backing store.
     */
    public void realloc(int length) {
        assert length >= 0 : "Invalid length";
        assert length >= realSize : "length is too small";

        byte tmp[] = new byte[length];
        System.arraycopy(bytes, 0, tmp, 0, realSize);
        bytes = tmp;
        invalidate();
    }

    /**
     * Return the current length of the ByteList.
     *
     * @return the number of bytes in this ByteList.
     */
    public int length() {
        return realSize;
    }

    /**
     * Return true if the ByteList has zero length (byte length).
     *
     * @return true if empty, false otherwise
     */
    public boolean isEmpty() {
        return realSize == 0;
    }

    // ENEBO: Wow...what happens if newLength < realSize...nasty shrinkage?
    /**
     * grow the bytelist to be newLength in size.
     *
     * @param newLength
     */
    public void length(int newLength) {
//        assert newLength >= realSize : "newLength is too small";

        grow(newLength - realSize);
        realSize = newLength;
    }

    /**
     * Number of characters in this ByteList based on its current encoding.
     *
     * @return number of characters
     */
    public int lengthEnc() {
        return encoding.strLength(bytes, begin, begin + realSize);
    }

    /**
     * Get the byte at index from the ByteList.
     *
     * @param index to retreive byte from
     * @return the byte retreived
     */
    public int get(int index) {
        assert index >= 0 : "index must be positive";

        return bytes[begin + index] & 0xFF;
    }

    /**
     *  Get the index code point in this ByteList.
     *
     * @param index is the element you want
     * @return the element you requested
     */
    public int getEnc(int index) {
        return encoding.strCodeAt(bytes, begin, begin + realSize, index);
    }

    /**
     * Set the byte at index to be new value.
     *
     * @param index to set byte
     * @param b is the new value.
     */
    public void set(int index, int b) {
        assert index >= 0 : "index must be positive";
        assert begin + index < begin + realSize : "index is too large";

        bytes[begin + index] = (byte)b;
        invalidate();
    }

    /**
     * Unsafe version of replace(int,int,ByteList). The contract is that these
     * unsafe versions will not make sure thet beg and len indices are correct.
     */
    public void unsafeReplace(int beg, int len, ByteList nbytes) {
        unsafeReplace(beg, len, nbytes.bytes, nbytes.begin, nbytes.realSize);
    }

    /**
     * Unsafe version of replace(int,int,byte[]). The contract is that these
     * unsafe versions will not make sure thet beg and len indices are correct.
     */
    public void unsafeReplace(int beg, int len, byte[] buf) {
        unsafeReplace(beg, len, buf, 0, buf.length);
    }

    /**
     * Unsafe version of replace(int,int,byte[],int,int). The contract is that these
     * unsafe versions will not make sure thet beg and len indices are correct.
     */
    public void unsafeReplace(int beg, int len, byte[] nbytes, int index, int count) {
        grow(count - len);
        int newSize = realSize + count - len;
        System.arraycopy(bytes, beg+len, bytes, beg+count, realSize - (len + beg));
        System.arraycopy(nbytes, index, bytes, beg, count);
        realSize = newSize;
        invalidate();
    }

    /**
     * Replace all bytes in this ByteList with bytes from the given array.
     *
     * @param source bytes to use for replacement
     */
    public void replace(byte[] source) {
        replace(0, realSize, source, 0, source.length);
    }

    /**
     * Replace a region of bytes in this ByteList with bytes from the given ByteList.
     *
     * @param targetOff offset of target region in this ByteList
     * @param targetLen length of target region in this ByteList
     * @param source ByteList to use for replacement
     */
    public void replace(int targetOff, int targetLen, ByteList source) {
        replace(targetOff, targetLen, source.bytes, source.begin, source.realSize);
    }

    /**
     * Replace a region of bytes in this ByteList with bytes from the given array.
     *
     * @param targetOff offset of target region in this ByteList
     * @param targetLen length of target region in this ByteList
     * @param source bytes to use for replacement
     */
    public void replace(int targetOff, int targetLen, byte[] source) {
        replace(targetOff, targetLen, source, 0, source.length);
    }

    /**
     * Replace a region of bytes in this ByteList with a region of bytes from the given array.
     *
     * @param targetOff offset of target region in this ByteList
     * @param targetLen length of target region in this ByteList
     * @param source bytes to use for replacement
     * @param sourceOff offset of source region in the replacement bytes
     * @param sourceLen length of source region in the replacement bytes
     */
    public void replace(int targetOff, int targetLen, byte[] source, int sourceOff, int sourceLen) {
        int newSize = realSize - targetLen + sourceLen;
        ensure(newSize);
        int tailSize = realSize - (targetLen + targetOff);
        if (tailSize != 0) {
            System.arraycopy(bytes,begin+targetOff+targetLen,bytes,begin+targetOff+sourceLen,tailSize);
        }
        if (sourceLen != 0) {
            System.arraycopy(source,sourceOff,bytes,begin+targetOff,sourceLen);
        }
        realSize = newSize;
        invalidate();
    }

    public void insert(int index, int b) {
        grow(1);
        System.arraycopy(bytes, index, bytes, index + 1, realSize - index);
        bytes[index] = (byte)b;
        realSize++;
        invalidate();
    }

    /**
     * Get the index of first occurrence of c in ByteList from the beginning of the ByteList.
     *
     * @param c byte to be looking for
     * @return the index of the byte or -1 if not found
     */
    public int indexOf(int c) {
        return indexOf(c, 0);
    }

    /**
     * Get the index of first occurrence of c in ByteList from the pos offset of the ByteList.
     *
     * @param c byte to be looking for
     * @param pos off set from beginning of ByteList to look for byte
     * @return the index of the byte or -1 if not found
     */
    public int indexOf(final int c, int pos) {
        // not sure if this is checked elsewhere,
        // didn't see it in RubyString. RubyString does
        // cast to char, so c will be >= 0.
        if (c > 255)
            return -1;
        final byte b = (byte)(c&0xFF);
        final int size = begin + realSize;
        final byte[] buf = bytes;
        pos += begin;
        for ( ; pos < size && buf[pos] != b ; pos++ ) ;
        return pos < size ? pos - begin : -1;
    }

    /**
     * Get the index of first occurrence of Bytelist find in this ByteList.
     *
     * @param find the ByteList to find
     * @return the index of the byte or -1 if not found
     */
    public int indexOf(ByteList find) {
        return indexOf(find, 0);
    }

    /**
     * Get the index of first occurrence of Bytelist find in this ByteList starting at index i.
     *
     * @param find the ByteList to find
     * @param i the index to start from
     * @return the index of the byte or -1 if not found
     */
    public int indexOf(ByteList find, int i) {
        return indexOf(bytes, begin, realSize, find.bytes, find.begin, find.realSize, i);
    }

    /**
     * Get the index of first occurrence of target in source using the offset and count parameters.
     * fromIndex can be used to start beyond zero on source.
     *
     * @return the index of the byte or -1 if not found
     */
    static int indexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target, int targetOffset, int targetCount, int fromIndex) {
        if (fromIndex >= sourceCount) return (targetCount == 0 ? sourceCount : -1);
        if (fromIndex < 0) fromIndex = 0;
        if (targetCount == 0) return fromIndex;

        byte first  = target[targetOffset];
        int max = sourceOffset + (sourceCount - targetCount);

        for (int i = sourceOffset + fromIndex; i <= max; i++) {
            if (source[i] != first) while (++i <= max && source[i] != first);

            if (i <= max) {
                int j = i + 1;
                int end = j + targetCount - 1;
                for (int k = targetOffset + 1; j < end && source[j] == target[k]; j++, k++);

                if (j == end) return i - sourceOffset;
            }
        }
        return -1;
    }

    /**
     * Get the index of last occurrence of c in ByteList from the end of the ByteList.
     *
     * @param c byte to be looking for
     * @return the index of the byte or -1 if not found
     */
    public int lastIndexOf(int c) {
        return lastIndexOf(c, realSize - 1);
    }

    /**
     * Get the index of last occurrence of c in ByteList from the pos offset of the ByteList.
     *
     * @param c byte to be looking for
     * @param pos off set from end of ByteList to look for byte
     * @return the index of the byte or -1 if not found
     */
    public int lastIndexOf(final int c, int pos) {
        // not sure if this is checked elsewhere,
        // didn't see it in RubyString. RubyString does
        // cast to char, so c will be >= 0.
        if (c > 255) return -1;

        final byte b = (byte)(c&0xFF);
        final int size = begin + realSize;
        pos += begin;
        final byte[] buf = bytes;
        if (pos >= size) {
            pos = size;
        } else {
            pos++;
        }
        for ( ; --pos >= begin && buf[pos] != b ; ) ;
        return pos - begin;
    }

    /**
     * Get the index of last occurrence of find in ByteList from the end of the ByteList.
     *
     * @param find ByteList to be looking for
     * @return the index of the byte or -1 if not found
     */
    public int lastIndexOf(ByteList find) {
        return lastIndexOf(find, realSize);
    }

    /**
     * Get the index of last occurrence of find in ByteList from the end of the ByteList.
     *
     * @param find ByteList to be looking for
     * @param pos index from end of list to search from
     * @return the index of the byte or -1 if not found
     */
    public int lastIndexOf(ByteList find, int pos) {
        return lastIndexOf(bytes, begin, realSize, find.bytes, find.begin, find.realSize, pos);
    }

    /**
     * Get the index of last occurrence of target in source using the offset and count parameters.
     * fromIndex can be used to start beyond zero on source.
     *
     * @return the index of the byte or -1 if not found
     */
    static int lastIndexOf(byte[] source, int sourceOffset, int sourceCount, byte[] target, int targetOffset, int targetCount, int fromIndex) {
        int rightIndex = sourceCount - targetCount;
        if (fromIndex < 0) return -1;
        if (fromIndex > rightIndex) fromIndex = rightIndex;
        if (targetCount == 0) return fromIndex;

        int strLastIndex = targetOffset + targetCount - 1;
        byte strLastChar = target[strLastIndex];
        int min = sourceOffset + targetCount - 1;
        int i = min + fromIndex;

        startSearchForLastChar:
        while (true) {
            while (i >= min && source[i] != strLastChar) i--;
            if (i < min) return -1;
            int j = i - 1;
            int start = j - (targetCount - 1);
            int k = strLastIndex - 1;

            while (j > start) {
                if (source[j--] != target[k--]) {
                    i--;
                    continue startSearchForLastChar;
                }
            }
            return start - sourceOffset + 1;
        }
    }

    public boolean startsWith(ByteList other, int toffset) {
        if (realSize == 0 || this.realSize < other.realSize + toffset) return false;

        byte[]ta = bytes;
        int to = begin + toffset;
        byte[]pa = other.bytes;
        int po = other.begin;
        int pc = other.realSize;

        while (--pc >= 0) if (ta[to++] != pa[po++]) return false;
        return true;
    }

    /**
     * Does this ByteList start with the supplied ByteList?
     *
     * @param other is the bytelist to compare with
     * @return true is this ByteList starts with other
     */
    public boolean startsWith(ByteList other) {
        return startsWith(other, 0);
    }

    /**
     * Does this ByteList end with the supplied ByteList?
     *
     * @param other is the bytelist to compare with
     * @return true is this ByteList starts with other
     */
    public boolean endsWith(ByteList other) {
        return startsWith(other, realSize - other.realSize);
    }

    /**
     * Does this ByteList equal the other ByteList?
     *
     * @param other is the bytelist to compare with
     * @return true is this ByteList is the same
     */
    @Override
    public boolean equals(Object other) {
        if (other instanceof ByteList) return equal((ByteList)other);
        return false;
    }

    /**
     * Does this ByteList equal the other ByteList?
     *
     * @param other is the bytelist to compare with
     * @return true is this ByteList is the same
     */
    public boolean equal(ByteList other) {
        if (other == this) return true;
        if (hash != 0 && other.hash != 0 && hash != other.hash) return false;

        int first, last;
        if ((last = realSize) == other.realSize) {
            byte buf[] = bytes;
            byte otherBuf[] = other.bytes;
            // scanning from front and back simultaneously, meeting in
            // the middle. the object is to get a mismatch as quickly as
            // possible. alternatives might be: scan from the middle outward
            // (not great because it won't pick up common variations at the
            // ends until late) or sample odd bytes forward and even bytes
            // backward (I like this one, but it's more expensive for
            // strings that are equal; see sample_equals below).

            for (first = -1;
                 --last > first && buf[begin + last] == otherBuf[other.begin + last] &&
                         ++first < last && buf[begin + first] == otherBuf[other.begin + first] ; ) ;
            return first >= last;
        }
        return false;
    }

    /**
     * an alternative to the new version of equals, should
     * detect inequality faster (in many cases), but is slow
     * in the case of equal values (all bytes visited), due to
     * using n+=2, n-=2 vs. ++n, --n while iterating over the array.
     */
    public boolean sample_equals(Object other) {
        if (other == this) return true;
        if (other instanceof ByteList) {
            ByteList b = (ByteList) other;
            int first;
            int last;
            int size;
            byte[] buf;
            if ((size = realSize) == b.realSize) {
                // scanning from front and back simultaneously, sampling odd
                // bytes on the forward iteration and even bytes on the
                // reverse iteration. the object is to get a mismatch as quickly
                // as possible.
                for (buf = bytes, first = -1, last = (size + 1) & ~1 ;
                     (last -= 2) >= 0 && buf[begin + last] == b.bytes[b.begin + last] &&
                             (first += 2) < size && buf[begin + first] == b.bytes[b.begin + first] ; ) ;
                return last < 0 || first == size;
            }
        }
        return false;
    }

    /**
     * This comparison matches MRI comparison of Strings (rb_str_cmp).
     * I wish we had memcmp right now...
     */
    public int compareTo(Object other) {
        return cmp((ByteList)other);
    }

    /**
     * This comparison matches MRI comparison of Strings (rb_str_cmp).
     */
    public int cmp(final ByteList other) {
        if (other == this) return 0;
        final int size = realSize;
        final int len =  Math.min(size, other.realSize);
        int offset = -1;
        // a bit of VM/JIT weirdness here: though in most cases
        // performance is improved if array references are kept in
        // a local variable (saves an instruction per access, as I
        // [slightly] understand it), in some cases, when two (or more?)
        // arrays are being accessed, the member reference is actually
        // faster.  this is one of those cases...
        for (  ; ++offset < len && bytes[begin + offset] == other.bytes[other.begin + offset]; ) ;
        if (offset < len) {
            return (bytes[begin + offset]&0xFF) > (other.bytes[other.begin + offset]&0xFF) ? 1 : -1;
        }
        return size == other.realSize ? 0 : size == len ? -1 : 1;
    }

    /**
     * Do a case insensitive comparison with other ByteList with return types similiar to compareTo.
     *
     * @param other the ByteList to compare
     * @return -1, 0, or 1
     */
    public int caseInsensitiveCmp(final ByteList other) {
        if (other == this) return 0;

        final int size = realSize;
        final int len =  Math.min(size, other.realSize);
        final int other_begin = other.begin;
        final byte[] other_bytes = other.bytes;

        for (int offset = -1; ++offset < len;) {
            int myCharIgnoreCase = AsciiTables.ToLowerCaseTable[bytes[begin + offset] & 0xff] & 0xff;
            int otherCharIgnoreCase = AsciiTables.ToLowerCaseTable[other_bytes[other_begin + offset] & 0xff] & 0xff;
            if (myCharIgnoreCase < otherCharIgnoreCase) {
                return -1;
            } else if (myCharIgnoreCase > otherCharIgnoreCase) {
                return 1;
            }
        }
        return size == other.realSize ? 0 : size == len ? -1 : 1;
    }

    /**
     * Returns the internal byte array. This is unsafe unless you know what you're
     * doing. But it can improve performance for byte-array operations that
     * won't change the array.
     *
     * @return the internal byte array
     */
    public byte[] unsafeBytes() {
        return bytes;
    }

    /**
     * Get a copy of the bytes referenced by this ByteList.  It will make an optimal copy and not
     * carry along unused bytes from COW sharing.
     *
     * @return a copy of the bytes.
     */
    public byte[] bytes() {
        byte[] newBytes = new byte[realSize];
        System.arraycopy(bytes, begin, newBytes, 0, realSize);
        return newBytes;
    }

    /**
     * First index of the backing array that contains data for the ByteList.  Note that we have
     * copy-on-write (COW) semantics which means sharing the same backing store will yield different
     * begin and size values while using the same byte[].
     *
     * @return the index
     */
    public int begin() {
        return begin;
    }

    /**
     * Grow the ByteList by increaseRequested bytes.  A value <0 will be a no-op.
     *
     * @param increaseRequested number of bytes to grow
     */
    private void grow(int increaseRequested) {
        if (increaseRequested < 0) return;

        // new available size
        int newSize = realSize + increaseRequested;

        // only recopy if bytes does not have enough room *after* the begin index
        if (newSize > bytes.length - begin) {
            byte[] newBytes = new byte[newSize + (newSize >> 1)];
            if (bytes.length != 0) System.arraycopy(bytes, begin, newBytes, 0, realSize);
            bytes = newBytes;
            begin = 0;
        }
    }

    /**
     * @see Object#hashCode()
     */
    @Override
    public int hashCode() {
        int hash = this.hash;
        if (hash != 0) return hash;

        hash = 0;
        int begin = this.begin, realSize = this.realSize;
        byte[] bytes = this.bytes;

        int index = begin;
        final int end = begin + realSize;
        while (index < end) {
            // equivalent of: hash = hash * 65599 + byte;
            hash = ((hash << 16) + (hash << 6) - hash) + (int)(bytes[index++]); // & 0xFF ?
        }
        hash = hash + (hash >> 5);
        return this.hash = hash;
    }

    /**
     * Remembers toString value, which is expensive for StringBuffer.
     *
     * @return an ISO-8859-1 representation of the byte list
     */
    @Override
    public String toString() {
        String decoded = this.stringValue;
        if (decoded == null) {
            this.stringValue = decoded = decode(bytes, begin, realSize, "ISO-8859-1");
        }
        return decoded;
    }

    /**
     * Produce a String using the raw (ISO-8859-1) bytes of this ByteList.
     *
     * @return a String based on the raw bytes
     */
    public String toByteString() {
        return new String(bytes, begin, realSize, StandardCharsets.ISO_8859_1);
    }

    /**
     * Create a bytelist with ISO_8859_1 encoding from the provided CharSequence.
     *
     * @param s the source for new ByteList
     * @return the new ByteList
     */
    public static ByteList create(CharSequence s) {
        return new ByteList(plain(s),false);
    }

    /**
     * Create a byte[] from a CharSequence assuming a raw/ISO-8859-1 encoding
     *
     * @param s the CharSequence to convert
     * @return a byte[]
     */
    public static byte[] plain(CharSequence s) {
        if (s instanceof String) return encode(s, "ISO-8859-1");

        // Not a String...get it the slow way
        byte[] bytes = new byte[s.length()];
        for (int i = 0; i < bytes.length; i++) {
            bytes[i] = (byte) s.charAt(i);
        }
        return bytes;
    }

    /**
     * Create a byte[] from a char[] assuming a raw/ISO-8859-1 encoding
     *
     * @param s the CharSequence to convert
     * @return a byte[]
     */
    public static byte[] plain(char[] s) {
        byte[] bytes = new byte[s.length];
        for (int i = 0; i < s.length; i++) {
            bytes[i] = (byte) s[i];
        }
        return bytes;
    }

    /**
     * Create a char[] from a byte[] assuming a raw/ISO-8859-1 encoding
     *
     * @param b the source byte[]
     * @param start index to start converting to char's
     * @param length how many bytes to convert to char's
     * @return a byte[]
     */
    public static char[] plain(byte[] b, int start, int length) {
        assert b != null : "byte array cannot be null";
        assert start >= 0 && start + length <= b.length : "Invalid start or start+length too long";

        char[] chars = new char[length];
        for (int i = 0; i < length; i++) {
            chars[i] = (char) (b[start + i] & 0xFF);
        }
        return chars;
    }

    /**
     * Create a char[] from a byte[] assuming a raw/ISO-8859-1 encoding
     *
     * @param b the source byte[]
     * @return a byte[]
     */
    public static char[] plain(byte[] b) {
        assert b != null : "byte array cannot be null";

        char[] chars = new char[b.length];
        for (int i = 0; i < b.length; i++) {
            chars[i] = (char) (b[i] & 0xFF);
        }
        return chars;
    }

    // Work around bad charset handling in JDK. See
    // http://halfbottle.blogspot.com/2009/07/charset-continued-i-wrote-about.html
    private static final ConcurrentMap charsetsByAlias =
            new ConcurrentHashMap();

    /**
     * Decode byte data into a String with the supplied charsetName.
     *
     * @param data to be decoded
     * @param offset where to start decoding from in data
     * @param length how many bytes to decode from data
     * @param charsetName used to make the resulting String
     * @return the new String
     */
    public static String decode(byte[] data, int offset, int length, String charsetName) {
        return lookup(charsetName).decode(ByteBuffer.wrap(data, offset, length)).toString();
    }

    /**
     * Decode byte data into a String with the supplied charsetName.
     *
     * @param data to be decoded
     * @param charsetName used to make the resulting String
     * @return the new String
     */
    public static String decode(byte[] data, String charsetName) {
        return lookup(charsetName).decode(ByteBuffer.wrap(data)).toString();
    }

    /**
     * Encode CharSequence into a set of bytes based on the charsetName.
     *
     * @param data to be encoded
     * @param charsetName used to extract the resulting bytes
     * @return the new byte[]
     */
    public static byte[] encode(CharSequence data, String charsetName) {
        return lookup(charsetName).encode(CharBuffer.wrap(data)).array();
    }

    private static Charset lookup(String alias) {
        Charset cs = charsetsByAlias.get(alias);
        if (cs == null) {
            cs = Charset.forName(alias);
            charsetsByAlias.putIfAbsent(alias, cs);
        }
        return cs;
    }

    /**
     * Pretend byte array is raw and each byte is also the character value
     *
     * @param ix is the index you want
     * @return
     */
    public char charAt(int ix) {
        return (char)(this.bytes[begin + ix] & 0xFF);
    }

    /**
     * Create subSequence of this array between start and end offsets
     *
     * @param start index for beginning of subsequence
     * @param end index for end of subsequence
     * @return a new ByteList/CharSequence
     */
    public CharSequence subSequence(int start, int end) {
        return new ByteList(this, start, end - start);
    }

    /**
     * Are these two byte arrays similiar (semantics similiar to compareTo).  This is slightly
     * special in that it will only compare the same number of bytes based on the lesser of the two
     * lengths.
     *
     * @return -1, 0, 1
     */
    public static int memcmp(final byte[] first, final int firstStart, final int firstLen, final byte[] second, final int secondStart, final int secondLen) {
        if (first == second) return 0;
        final int len =  Math.min(firstLen,secondLen);
        int offset = -1;
        for (  ; ++offset < len && first[firstStart + offset] == second[secondStart + offset]; ) ;
        if (offset < len) {
            return (first[firstStart + offset]&0xFF) > (second[secondStart + offset]&0xFF) ? 1 : -1;
        }
        return firstLen == secondLen ? 0 : firstLen == len ? -1 : 1;

    }

    /**
     * Are these two byte arrays similiar (semantics similiar to compareTo).
     *
     * @return -1, 0, 1
     */
    public static int memcmp(final byte[] first, final int firstStart, final byte[] second, final int secondStart, int len) {
        int a = firstStart;
        int b = secondStart;
        int tmp;

        for (; len != 0; --len) {
            if ((tmp = first[a++] - second[b++]) != 0) {
                return tmp;
            }
        }
        return 0;
    }


    /**
     * @return the bytes
     */
    public byte[] getUnsafeBytes() {
        return bytes;
    }

    /**
     * @param bytes the bytes to set
     */
    public void setUnsafeBytes(byte[] bytes) {
        assert bytes != null;
        this.bytes = bytes;
        invalidate();
    }

    /**
     * @return the begin
     */
    public int getBegin() {
        return begin;
    }

    /**
     * @param begin the begin to set
     */
    public void setBegin(int begin) {
        assert begin >= 0;
        this.begin = begin;
        invalidate();
    }

    /**
     * @return the realSize
     */
    public int getRealSize() {
        return realSize();
    }

    /**
     * @param realSize the realSize to set
     */
    public void setRealSize(int realSize) {
        realSize(realSize);
    }

    /**
     * @return the realSize
     */
    public int realSize() { return realSize; }

    /**
     * @param realSize the realSize to set
     */
    public void realSize(int realSize) {
        assert realSize >= 0;
        this.realSize = realSize;
        invalidate();
    }

    /**
     * @return the encoding
     */
    public Encoding getEncoding() {
        return encoding;
    }

    /**
     * @param encoding the encoding to set
     */
    public void setEncoding(Encoding encoding) {
        assert encoding != null;
        this.encoding = safeEncoding(encoding);
        invalidate();
    }

    /**
     * Ensure the encoding is always non-null.
     */
    public static Encoding safeEncoding(Encoding incoming) {
        if (incoming == null) return ASCIIEncoding.INSTANCE;
        return incoming;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy