org.python.core.Py2kBuffer Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of jython-slim Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented language Python written in 100% Pure Java, and seamlessly integrated with the Java platform. It thus allows you to run Python on any Java platform.
The newest version!
// Copyright (c) 2013 Jython Developers
package org.python.core;

import org.python.core.buffer.BaseBuffer;
import org.python.core.buffer.SimpleStringBuffer;
import org.python.core.util.StringUtil;
import org.python.expose.ExposedMethod;
import org.python.expose.ExposedNew;
import org.python.expose.ExposedType;
import org.python.expose.MethodType;

/**
 * Implementation of the Python buffer type. buffer is being superseded in
 * Python 2.7 by memoryview, and is provided here to support legacy Python code. Use
 * memoryview if you can.
 * 
 * buffer and memoryview both wrap the same Jython buffer API:
 * that designed for memoryview. In CPython, a new C API (which Jython's resembles) was
 * introduced with memoryview. Because of this, buffer and
 * memoryview may be supplied as arguments in the same places, and will accept as
 * arguments the same (one-dimensional byte-array) types. Their behaviour differs as detailed in the
 * documentation.
 */
@Untraversable
@ExposedType(name = "buffer", doc = BuiltinDocs.buffer_doc, base = PyObject.class,
        isBaseType = false)
public class Py2kBuffer extends PySequence implements BufferProtocol {

    public static final PyType TYPE = PyType.fromClass(Py2kBuffer.class);

    /** The underlying object on which the buffer was created. */
    private final BufferProtocol object;
    /** The offset (in bytes) into the offered object at which the buffer starts. */
    private final int offset;
    /** Number of bytes to include in the buffer (or -1 for all available). */
    private final int size;

    /**
     * Construct a Py2kBuffer from an object supporting the {@link BufferProtocol}. The
     * buffer takes no lease on the PyBuffer at present, but for each
     * action performed obtains a new one and releases it. (Major difference from
     * memoryview.) Note that when size=-1 is given, the buffer reflects
     * the changing size of the underlying object.
     *
     * @param object the object on which this is to be a buffer.
     * @param offset into the array exposed by the object (0 for start).
     * @param size of the slice or -1 for all of the object.
     */
    public Py2kBuffer(BufferProtocol object, int offset, int size) {
        super(TYPE);

        if (object instanceof Py2kBuffer) {
            // Special behaviour when the source object is another of our kind.
            Py2kBuffer source = (Py2kBuffer)object;
            offset = source.offset + offset;
            if (source.size >= 0) {
                // The source imposes a size limit, or rather it imposes an end
                int end = source.offset + source.size;
                if (size < 0 || offset + size > end) {
                    // We are asked for unlimited/excessive length, but must impose source end.
                    size = end - offset;
                }
            }
            // This will be a Py2kBuffer with the derived offset and size on the same object.
            object = source.object;
        }
        this.object = object;
        this.offset = offset;
        this.size = size;
    }

    /**
     * Every action on the buffer must obtain a new {@link PyBuffer} reflecting (this
     * buffer's slice of) the contents of the backing object.
     *
     * @return a PyBuffer onto the specified slice.
     */
    private PyBuffer getBuffer() {
        /*
         * Ask for a simple one-dimensional byte view from the object, as we cannot deal with more
         * complex navigation. Ask for read access. If the object is writable, the PyBuffer will be
         * writable, but we won't write to it.
         */
        final int flags = PyBUF.SIMPLE;
        PyBuffer buf = object.getBuffer(flags);

        // This may already be what we need, or this buffer may be a sub-range of the object
        if (offset > 0 || size >= 0) {
            /*
             * It's a sub-range so we have to construct a slice buffer on the first buffer. Take
             * care that the bounds of the slice are within the object, which may have changed size
             * since the buffer was created.
             */
            PyBuffer first = buf;
            int start = offset;
            int length = first.getLen() - start;

            if (length <= 0) {
                // Range now lies outside object: zero length slice
                start = length = 0;
            } else if (size >= 0 && size < length) {
                // A size less than the available bytes was specified (size==-1 => all of them)
                length = size;
            }

            // Now offset and length specify a feasible slice
            buf = first.getBufferSlice(flags, offset, length);

            // We should release our first lease because the slice buf holds one.
            // That lease will be released when buf is released.
            first.release();
        }
        return buf;
    }

    /**
     * Return a {@link PyObject} bearing the interface {@link BufferProtocol} and equivalent to the
     * argument, or return null. This is a helper function to those methods that accept
     * a range of types supporting the buffer API. Normally the return is exactly the argument,
     * except in the case of a {@link PyUnicode}, which will be converted to a {@link PyString}
     * according to Py2k semantics, equivalent to a UTF16BE encoding to bytes (for Py2k
     * compatibility).
     *
     * @param obj the object to access.
     * @return PyObject supporting {@link BufferProtocol}, if not null.
     */
    private static BufferProtocol asBufferableOrNull(PyObject obj) {

        if (obj instanceof PyUnicode) {
            /*
             * Jython unicode does not support the buffer protocol (so that you can't take a
             * memoryview of one). But to be compatible with CPython we allow buffer(unicode) to
             * export two-byte UTF-16. Fortunately, a buffer is read-only, so we can use a copy.
             */
            String bytes = codecs.encode((PyString)obj, "UTF-16BE", "replace");
            return new PyString(bytes);

        } else if (obj instanceof BufferProtocol) {
            // That will do directly
            return (BufferProtocol)obj;

        } else {
            // We don't know how to give this value the buffer API.
            return null;
        }
    }

    /** Names of arguments in the constructor (for ArgParser). */
    private static String[] paramNames = {"object", "offset", "size"};

    @ExposedNew
    static PyObject buffer_new(PyNewWrapper new_, boolean init, PyType subtype, PyObject[] args,
            String[] keywords) {

        // Use the ArgParser to access the arguments
        ArgParser ap = new ArgParser("buffer", args, keywords, paramNames, 1);
        PyObject obj = ap.getPyObject(0);
        int offset = ap.getIndex(1, 0);
        int size = ap.getInt(2, -1);

        // Get the object as a BufferProtocol if possible
        BufferProtocol object = asBufferableOrNull(obj);

        // Checks
        if (object == null) {
            throw Py.TypeError("buffer object expected (or unicode)");
        } else if (offset < 0) {
            throw Py.ValueError("offset must be zero or positive");
        } else if (size < -1) {
            throw Py.ValueError("size must be zero or positive");
        } else {
            // Checks ok
            return new Py2kBuffer(object, offset, size);
        }
    }

    @Override
    public int __len__() {
        PyBuffer buf = getBuffer();
        try {
            return buf.getLen();
        } finally {
            buf.release();
        }
    }

    @Override
    public PyString __repr__() {
        return buffer___repr__();
    }

    @ExposedMethod(doc = BuiltinDocs.buffer___repr___doc)
    final PyString buffer___repr__() {
        String fmt = "";
        String ret = String.format(fmt, Py.idstr((PyObject) object), size, offset, Py.idstr(this));
        return new PyString(ret);
    }

    @Override
    public PyString __str__() {
        return buffer___str__();
    }

    @ExposedMethod(doc = BuiltinDocs.buffer___str___doc)
    final PyString buffer___str__() {
        return new PyString(toString());
    }

    @Override
    public String toString() {
        try (PyBuffer buf = getBuffer()) {
            return buf.toString();
        }
    }

    /**
     * {@inheritDoc} A buffer implements this as concatenation and returns a
     * str ({@link PyString}) result.
     */
    @Override
    public PyObject __add__(PyObject other) {
        return buffer___add__(other);
    }

    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.buffer___add___doc)
    final PyObject buffer___add__(PyObject other) {

        // The other operand must offer us the buffer interface
        BufferProtocol bp = asBufferableOrNull(other);

        if (bp == null) {
            // Allow PyObject._basic_add to pick up the pieces or raise informative error
            return null;
        } else {
            // PyBuffer on the underlying object of this buffer
            PyBuffer buf = getBuffer();
            try {
                // And on the other operand (ask for simple 1D-bytes).
                PyBuffer otherBuf = bp.getBuffer(PyBUF.SIMPLE);
                try {
                    // Concatenate the buffers as strings
                    return new PyString(buf.toString().concat(otherBuf.toString()));
                } finally {
                    // Must always let go of the buffer
                    otherBuf.release();
                }
            } finally {
                // Must always let go of the buffer
                buf.release();
            }
        }
    }

    /**
     * {@inheritDoc} On a buffer it returns a str containing the buffer
     * contents n times.
     */
    @Override
    public PyObject __mul__(PyObject o) {
        return buffer___mul__(o);
    }

    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.buffer___mul___doc)
    final PyObject buffer___mul__(PyObject o) {
        if (!o.isIndex()) {
            return null;
        }
        return repeat(o.asIndex(Py.OverflowError));
    }

    /**
     * {@inheritDoc} On a buffer it returns a str containing the buffer
     * contents n times.
     */
    @Override
    public PyObject __rmul__(PyObject o) {
        return buffer___rmul__(o);
    }

    @ExposedMethod(type = MethodType.BINARY, doc = BuiltinDocs.buffer___rmul___doc)
    final PyObject buffer___rmul__(PyObject o) {
        if (!o.isIndex()) {
            return null;
        }
        return repeat(o.asIndex(Py.OverflowError));
    }

    /*
     * ============================================================================================
     * Python API comparison operations
     * ============================================================================================
     */

    /**
     * Comparison function between two buffers of bytes, returning 1, 0 or -1 as a>b,
     * a==b, or a<b respectively. The comparison is by value, using Python unsigned byte
     * conventions, left-to-right (low to high index). Zero bytes are significant, even at the end
     * of the array: [65,66,67]<"ABC\u0000", for example and [] is less
     * than every non-empty b, while []=="".
     *
     * @param a left-hand wrapped array in the comparison
     * @param b right-hand wrapped object in the comparison
     * @return 1, 0 or -1 as a>b, a==b, or a<b respectively
     */
    private static int compare(PyBuffer a, PyBuffer b) {

        // Compare elements one by one in these ranges:
        int ap = 0;
        int aEnd = ap + a.getLen();
        int bp = 0;
        int bEnd = b.getLen();

        while (ap < aEnd) {
            if (bp >= bEnd) {
                // a is longer than b
                return 1;
            } else {
                // Compare the corresponding bytes
                int aVal = a.intAt(ap++);
                int bVal = b.intAt(bp++);
                int diff = aVal - bVal;
                if (diff != 0) {
                    return (diff < 0) ? -1 : 1;
                }
            }
        }

        // All the bytes matched and we reached the end of a
        if (bp < bEnd) {
            // But we didn't reach the end of b
            return -1;
        } else {
            // And the end of b at the same time, so they're equal
            return 0;
        }
    }

    /**
     * Comparison function between this buffer and any other object. The inequality
     * comparison operators are based on this.
     *
     * @param b
     * @return 1, 0 or -1 as this>b, this==b, or this<b respectively, or -2 if the comparison is
     *         not implemented
     */
    private int buffer_cmp(PyObject b) {

        // Check the memeoryview is still alive: works here for all the inequalities
        PyBuffer buf = getBuffer();
        try {

            // Try to get a byte-oriented view
            PyBuffer bv = BaseBytes.getView(b);

            if (bv == null) {
                // Signifies a type mis-match. See PyObject._cmp_unsafe() and related code.
                return -2;

            } else {

                try {
                    if (bv == buf) {
                        // Same buffer: quick result
                        return 0;
                    } else {
                        // Actually compare the contents
                        return compare(buf, bv);
                    }

                } finally {
                    // Must always let go of the buffer
                    bv.release();
                }
            }
        } finally {
            buf.release();
        }

    }

    /**
     * Fail-fast comparison function between byte array types and any other object, for when the
     * test is only for equality. The inequality comparison operators __eq__ and
     * __ne__ are based on this.
     *
     * @param b
     * @return 0 if this==b, or +1 or -1 if this!=b, or -2 if the comparison is not implemented
     */
    private int buffer_cmpeq(PyObject b) {

        // Get a view on the underlying object
        PyBuffer buf = getBuffer();
        try {

            // Try to get a byte-oriented view
            PyBuffer bv = BaseBytes.getView(b);

            if (bv == null) {
                // Signifies a type mis-match. See PyObject._cmp_unsafe() and related code.
                return -2;

            } else {

                try {
                    if (bv == buf) {
                        // Same buffer: quick result
                        return 0;
                    } else if (bv.getLen() != buf.getLen()) {
                        // Different size: can't be equal, and we don't care which is bigger
                        return 1;
                    } else {
                        // Actually compare the contents
                        return compare(buf, bv);
                    }

                } finally {
                    // Must always let go of the buffer
                    bv.release();
                }
            }
        } finally {
            buf.release();
        }

    }

    /*
     * These strings are adapted from the patch in CPython issue 15855 and the on-line documentation
     * most attributes do not come with any docstrings in CPython 2.7, so the make_pydocs trick
     * won't work. This is a complete set, although not all are needed in Python 2.7.
     */
    private final static String tobytes_doc = "M.tobytes() -> bytes\n\n"
            + "Return the data in the buffer as a bytestring (an object of class str).";

    private final static String tolist_doc = "M.tolist() -> list\n\n"
            + "Return the data in the buffer as a list of elements.";

    /*
     * ============================================================================================
     * Support for the Buffer API
     * ============================================================================================
     */

    /**
     * {@inheritDoc}
     * 

     * The {@link PyBuffer} returned from this method is provided directly by the underlying object
     * on which this buffer was constructed, taking account of the slicing arguments (offset and
     * size), if these were given when the buffer was constructed.
     */
    @Override
    public PyBuffer getBuffer(int flags) {

        // Get a simple buffer meeting the specification of tha caller
        PyBuffer buf = object.getBuffer(flags);

        // This may already be what we need, or this buffer may be a sub-range of the object
        if (offset > 0 || size >= 0) {
            /*
             * It's a sub-range so we have to construct a slice buffer on the first buffer. Take
             * care that the bounds of the slice are within the object, which may have changed size
             * since the buffer was created.
             */
            PyBuffer first = buf;
            int start = offset;
            int length = first.getLen() - start;

            if (length <= 0) {
                // Range now lies outside object: zero length slice
                start = length = 0;
            } else if (size >= 0 && size < length) {
                // A size less than the available bytes was specified (size==-1 => all of them)
                length = size;
            }

            // Now offset and length specify a feasible slice
            buf = first.getBufferSlice(flags, offset, length);

            // We should release our first lease because the slice buf holds one.
            // That lease will be released when buf is released.
            first.release();
        }
        return buf;
    }

    /*
     * ============================================================================================
     * API for org.python.core.PySequence
     * ============================================================================================
     */
    /**
     * Gets the indexed element of the buffer as a one byte string. This is an
     * extension point called by PySequence in its implementation of {@link #__getitem__}. It is
     * guaranteed by PySequence that the index is within the bounds of the buffer.
     *
     * @param index index of the element to get.
     * @return one-character string formed from the byte at the index
     */
    @Override
    protected PyString pyget(int index) {
        // Our chance to check the buffer is still alive
        PyBuffer buf = getBuffer();
        try {
            // Treat the byte at the index as a character code
            return new PyString(String.valueOf((char)buf.intAt(index)));
        } finally {
            buf.release();
        }
    }

    /**
     * Returns a slice of elements from this sequence as a PyString.
     *
     * @param start the position of the first element.
     * @param stop one more than the position of the last element.
     * @param step the step size.
     * @return a PyString corresponding the the given range of elements.
     */
    @Override
    protected synchronized PyString getslice(int start, int stop, int step) {
        // Our chance to check the buffer is still alive
        PyBuffer buf = getBuffer();
        try {
            int n = sliceLength(start, stop, step);
            PyBuffer first = buf;
            buf = first.getBufferSlice(PyBUF.FULL_RO, start, n, step);
            first.release(); // We've finished (buf holds a lease)
            PyString ret = Py.newString(buf.toString());
            return ret;
        } finally {
            buf.release();
        }
    }

    /**
     * buffer*int represents repetition in Python, and returns a str (
     * bytes) object.
     *
     * @param count the number of times to repeat this.
     * @return a PyString repeating this buffer (as a str) that many times
     */
    @Override
    protected synchronized PyString repeat(int count) {
        PyBuffer buf = getBuffer();
        try {
            PyString ret = Py.newString(buf.toString());
            return (PyString)ret.repeat(count);
        } finally {
            buf.release();
        }
    }

    /**
     * Sets the indexed element of the buffer to the given value, treating the
     * operation as assignment to a slice of length one. This is different from the same operation
     * on a byte array, where the assigned value must be an int: here it must have the buffer API
     * and length one. This is an extension point called by PySequence in its implementation of
     * {@link #__setitem__} It is guaranteed by PySequence that the index is within the bounds of
     * the buffer. Any other clients calling pyset(int, PyObject) must make
     * the same guarantee.
     *
     * @param index index of the element to set.
     * @param value to set this element to, regarded as a buffer of length one unit.
     * @throws PyException {@code AttributeError} if value cannot be converted to an integer
     * @throws PyException {@code ValueError} if value<0 or value%gt;255
     */
    @Override
    public synchronized void pyset(int index, PyObject value) throws PyException {
        // Our chance to check the buffer is still alive
        PyBuffer buf = getBuffer();
        try {

            // Get a buffer API on the value being assigned
            PyBuffer valueBuf = BaseBytes.getViewOrError(value);
            try {
                if (valueBuf.getLen() != 1) {
                    // CPython 2.7 message
                    throw Py.ValueError("cannot modify size of buffer object");
                }
                buf.storeAt(valueBuf.byteAt(0), index);
            } finally {
                valueBuf.release();
            }
        } finally {
            buf.release();
        }
    }

    /**
     * Sets the given range of elements according to Python slice assignment semantics. If the step
     * size is one, it is a simple slice and the operation is equivalent to replacing that slice,
     * with the value, accessing the value via the buffer protocol.
     *
     * 
     * a = bytearray(b'abcdefghijklmnopqrst')
     * m = buffer(a)
     * m[2:7] = "ABCDE"
     * 
     *
     * Results in a=bytearray(b'abABCDEhijklmnopqrst').
     * 
     * If the step size is one, but stop-start does not match the length of the right-hand-side a
     * ValueError is thrown.
     * 

     * If the step size is not one, and start!=stop, the slice defines a certain number of elements
     * to be replaced. This function is not available in Python 2.7 (but it is in Python 3.3).
     * 

     *
     * 
     * a = bytearray(b'abcdefghijklmnopqrst')
     * a[2:12:2] = iter( [65, 66, 67, long(68), "E"] )
     * 
     *
     * Results in a=bytearray(b'abAdBfChDjElmnopqrst') in Python 3.3.
     *
     * @param start the position of the first element.
     * @param stop one more than the position of the last element.
     * @param step the step size.
     * @param value an object consistent with the slice assignment
     */
    @Override
    protected synchronized void setslice(int start, int stop, int step, PyObject value) {
        // Our chance to check the buffer is still alive
        PyBuffer buf = getBuffer();
        try {

            if (step == 1 && stop < start) {
                // Because "b[5:2] = v" means insert v just before 5 not 2.
                // ... although "b[5:2:-1] = v means b[5]=v[0], b[4]=v[1], b[3]=v[2]
                stop = start;
            }

            // Get a buffer API on the value being assigned
            PyBuffer valueBuf = BaseBytes.getViewOrError(value);

            // We'll also regard the assigned slice as a buffer.
            PyBuffer bufSlice = null;

            try {
                // How many destination items? Has to match size of value.
                int n = sliceLength(start, stop, step);
                if (n != valueBuf.getLen()) {
                    // CPython 2.7 message
                    throw Py.ValueError("cannot modify size of buffer object");
                }

                /*
                 * In the next section, we get a sliced view of the buf and write the value to it.
                 * The approach to errors is unusual for compatibility with CPython. We pretend we
                 * will not need a WRITABLE buffer in order to avoid throwing a BufferError. This
                 * does not stop the returned object being writable, simply avoids the check. If in
                 * fact it is read-only, then trying to write raises TypeError.
                 */

                bufSlice = buf.getBufferSlice(PyBUF.FULL_RO, start, n, step);
                bufSlice.copyFrom(valueBuf);

            } finally {

                // Release the buffers we obtained (if we did)
                if (bufSlice != null) {
                    bufSlice.release();
                }
                valueBuf.release();
            }
        } finally {
            buf.release();
        }
    }

}