org.python.modules._csv._csv Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-standalone Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented language Python written in 100% Pure Java, and seamlessly integrated with the Java platform. It thus allows you to run Python on any Java platform.
There is a newer version: 2.7.4
Show newest version
/* Copyright (c) Jython Developers */
package org.python.modules._csv;

import org.python.core.ArgParser;
import org.python.core.ClassDictInit;
import org.python.core.Py;
import org.python.core.PyBaseString;
import org.python.core.PyDictionary;
import org.python.core.PyException;
import org.python.core.PyInteger;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.core.PyStringMap;

/**
 * The Python _csv module.
 *
 * Provides the low-level underpinnings of a CSV reading/writing module.  Users should not
 * use this module directly, but import the csv.py module instead.
 */
public class _csv implements ClassDictInit {

    public static PyString __doc__ = Py.newString(
        "CSV parsing and writing.\n" +
        "\n" +
        "This module provides classes that assist in the reading and writing\n" +
        "of Comma Separated Value (CSV) files, and implements the interface\n" +
        "described by PEP 305.  Although many CSV files are simple to parse,\n" +
        "the format is not formally defined by a stable specification and\n" +
        "is subtle enough that parsing lines of a CSV file with something\n" +
        "like line.split(\",\") is bound to fail.  The module supports three\n" +
        "basic APIs: reading, writing, and registration of dialects.\n" +
        "\n" +
        "\n" +
        "DIALECT REGISTRATION:\n" +
        "\n" +
        "Readers and writers support a dialect argument, which is a convenient\n" +
        "handle on a group of settings.  When the dialect argument is a string,\n" +
        "it identifies one of the dialects previously registered with the module.\n" +
        "If it is a class or instance, the attributes of the argument are used as\n" +
        "the settings for the reader or writer:\n" +
        "\n" +
        "    class excel:\n" +
        "        delimiter = ','\n" +
        "        quotechar = '\"'\n" +
        "        escapechar = None\n" +
        "        doublequote = True\n" +
        "        skipinitialspace = False\n" +
        "        lineterminator = '\r\n'\n" +
        "        quoting = QUOTE_MINIMAL\n" +
        "\n" +
        "SETTINGS:\n" +
        "\n" +
        "    * quotechar - specifies a one-character string to use as the \n" +
        "        quoting character.  It defaults to '\"'.\n" +
        "    * delimiter - specifies a one-character string to use as the \n" +
        "        field separator.  It defaults to ','.\n" +
        "    * skipinitialspace - specifies how to interpret whitespace which\n" +
        "        immediately follows a delimiter.  It defaults to False, which\n" +
        "        means that whitespace immediately following a delimiter is part\n" +
        "        of the following field.\n" +
        "    * lineterminator -  specifies the character sequence which should \n" +
        "        terminate rows.\n" +
        "    * quoting - controls when quotes should be generated by the writer.\n" +
        "        It can take on any of the following module constants:\n" +
        "\n" +
        "        csv.QUOTE_MINIMAL means only when required, for example, when a\n" +
        "            field contains either the quotechar or the delimiter\n" +
        "        csv.QUOTE_ALL means that quotes are always placed around fields.\n" +
        "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" +
        "            fields which do not parse as integers or floating point\n" +
        "            numbers.\n" +
        "        csv.QUOTE_NONE means that quotes are never placed around fields.\n" +
        "    * escapechar - specifies a one-character string used to escape \n" +
        "        the delimiter when quoting is set to QUOTE_NONE.\n" +
        "    * doublequote - controls the handling of quotes inside fields.  When\n" +
        "        True, two consecutive quotes are interpreted as one during read,\n" +
        "        and when writing, each quote character embedded in the data is\n" +
        "        written as two quotes\n" +
        "\n");

    // XXX: should be per PySystemState
    /** Dialect registry. */
    public static PyDictionary _dialects = new PyDictionary();

    // XXX: should be per PySystemState
    /** Max parsed field size */
    public static volatile int field_limit = 128 * 1024;

    /** _csv.Error exception. */
    public static final PyObject Error = Py.makeClass("Error", Py.Exception, exceptionNamespace());
    public static PyException Error(String message) {
        return new PyException(Error, message);
    }

    /** Module version. */
    public static PyString __version__ = new PyString("1.0");

    public static void classDictInit(PyObject dict) {
        dict.__setitem__("__name__", Py.newString("_csv"));
        dict.__setitem__("__doc__", __doc__);
        dict.__setitem__("Dialect", PyDialect.TYPE);
        dict.__setitem__("Error", Error);

        for (QuoteStyle style : QuoteStyle.values()) {
            dict.__setitem__(style.name(), Py.newInteger(style.ordinal()));
        }

        dict.__setitem__("classDictInit", null);
        dict.__setitem__("field_limit", null);
    }

    public static void register_dialect(PyObject[] args, String[] keywords) {
        int argc = args.length - keywords.length;
        if (argc > 2) {
            throw Py.TypeError("register_dialect() expected at most 2 arguments, got " + argc);
        }

        ArgParser ap = parseArgs("register_dialect", args, keywords);
        PyObject name = ap.getPyObject(0);
        PyObject dialect = ap.getPyObject(1, null);

        if (!(name instanceof PyBaseString)) {
            throw Py.TypeError("dialect name must be a string or unicode");
        }

        _dialects.__setitem__(name, dialectFromKwargs(dialect, args, keywords));
        return;
    }

    public static void unregister_dialect(PyObject name) {
        if (!_dialects.has_key(name)) {
            throw Error("unknown dialect");
        }
        _dialects.__delitem__(name);
    }

    public static PyObject get_dialect(PyObject name) {
        return get_dialect_from_registry(name);
    }

    public static PyObject list_dialects() {
        return _dialects.keys();
    }

    public static PyObject reader(PyObject[] args, String[] keywords) {
        ArgParser ap = parseArgs("reader", args, keywords);
        PyObject iterator = Py.iter(ap.getPyObject(0), "argument 1 must be an iterator");
        PyObject dialect = ap.getPyObject(1, null);
        return new PyReader(iterator, dialectFromKwargs(dialect, args, keywords));
    }

    public static PyObject writer(PyObject[] args, String[] keywords) {
        ArgParser ap = parseArgs("writer", args, keywords);
        PyObject outputFile = ap.getPyObject(0);
        PyObject dialect = ap.getPyObject(1, null);

        PyObject writeline = outputFile.__findattr__("write");
        if (writeline == null || !writeline.isCallable()) {
            throw Py.TypeError("argument 1 must have a \"write\" method");
        }
        return new PyWriter(writeline, dialectFromKwargs(dialect, args, keywords));
    }

    public static PyInteger field_size_limit() {
        return Py.newInteger(field_limit);
    }

    public static PyInteger field_size_limit(PyObject new_limit) {
        if (!(new_limit instanceof PyInteger)) {
            throw Py.TypeError("limit must be an integer");
        }
        int old_limit = field_limit;
        field_limit = new_limit.asInt();
        return Py.newInteger(old_limit);
    }

    static PyObject get_dialect_from_registry(PyObject name) {
        PyObject dialect = _dialects.__finditem__(name);
        if (dialect == null) {
            throw Error("unknown dialect");
        }
        return dialect;
    }

    /**
     * Return an ArgParser that ignores keyword args.
     */
    private static ArgParser parseArgs(String funcName, PyObject[] args, String[] keywords) {
        // XXX: _weakref.ReferenceType has the same code
        if (keywords.length > 0) {
            int argc = args.length - keywords.length;
            PyObject[] justArgs = new PyObject[argc];
            System.arraycopy(args, 0, justArgs, 0, argc);
            args = justArgs;
        }
        return new ArgParser(funcName, args, Py.NoKeywords, Py.NoKeywords);
    }

    /**
     * Return a Dialect instance created or updated from keyword arguments.
     */
    private static PyDialect dialectFromKwargs(PyObject dialect, PyObject[] args,
                                               String[] keywords) {
        PyObject[] dialectArgs;
        int argc = args.length - keywords.length;

        // was a dialect keyword specified?
        boolean dialectKeyword = false;
        for (String keyword : keywords) {
            if (keyword.equals("dialect")) {
                dialectKeyword = true;
            }
        }

        if (dialect == null || dialectKeyword) {
            // dialect wasn't passed as a positional arg
            dialectArgs = new PyObject[keywords.length];
            System.arraycopy(args, argc, dialectArgs, 0, keywords.length);
        } else {
            // have dialect -- pass it to dialect_new as a positional arg
            dialectArgs = new PyObject[1 + keywords.length];
            dialectArgs[0] = dialect;
            System.arraycopy(args, argc, dialectArgs, 1, keywords.length);
        }
        return (PyDialect)PyDialect.TYPE.__call__(dialectArgs, keywords);
    }

    private static PyObject exceptionNamespace() {
        PyObject dict = new PyStringMap();
        dict.__setitem__("__module__", new PyString("_csv"));
        return dict;
    }
}