org.python.modules._codecs Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython
Jython is an implementation of the high-level, dynamic, object-oriented language Python written in 100% Pure Java, and seamlessly integrated with the Java platform. It thus allows you to run Python on any Java platform.
There is a newer version: 2.7.4
Show newest version
/*
 * Copyright 2000 Finn Bock
 *
 * This program contains material copyrighted by:
 * Copyright (c) Corporation for National Research Initiatives.
 * Originally written by Marc-Andre Lemburg ([email protected]).
 */

package org.python.modules;

import org.python.core.Py;
import org.python.core.PyInteger;
import org.python.core.PyObject;
import org.python.core.PyString;
import org.python.core.PyTuple;
import org.python.core.PyUnicode;
import org.python.core.codecs;

public class _codecs {

    public static void register(PyObject search_function) {
        codecs.register(search_function);
    }


    public static PyTuple lookup(String encoding) {
        return codecs.lookup(encoding);
    }

    private static PyTuple decode_tuple(String s, int len) {
        return new PyTuple(new PyObject[] {
            new PyUnicode(s),
            Py.newInteger(len)
        });
    }

    private static PyTuple encode_tuple(String s, int len) {
        return new PyTuple(new PyObject[] {
            Py.java2py(s),
            Py.newInteger(len)
        });
    }


    /* --- UTF-8 Codec --------------------------------------------------- */

    public static PyTuple utf_8_decode(String str) {
        return utf_8_decode(str, null);
    }

    public static PyTuple utf_8_decode(String str, String errors) {
        int size = str.length();
        return decode_tuple(codecs.PyUnicode_DecodeUTF8(str, errors), size);
    }


    public static PyTuple utf_8_encode(String str) {
        return utf_8_encode(str, null);
    }

    public static PyTuple utf_8_encode(String str, String errors) {
        int size = str.length();
        return encode_tuple(codecs.PyUnicode_EncodeUTF8(str, errors), size);
    }



    /* --- Character Mapping Codec --------------------------------------- */

    public static PyTuple charmap_decode(String str, String errors,
                                         PyObject mapping) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);

        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch > 0xFF) {
                codecs.decoding_error("charmap", v, errors,
                                      "ordinal not in range(255)");
                i++;
                continue;
            }

            PyObject w = Py.newInteger(ch);
            PyObject x = mapping.__finditem__(w);
            if (x == null) {
                /* No mapping found: default to Latin-1 mapping if possible */
                v.append(ch);
                continue;
            }

            /* Apply mapping */
            if (x instanceof PyInteger) {
                int value = ((PyInteger) x).getValue();
                if (value < 0 || value > 65535)
                    throw Py.TypeError(
                             "character mapping must be in range(65535)");
                v.append((char) value);
            } else if (x == Py.None) {
                codecs.decoding_error("charmap", v,  errors,
                                      "character maps to ");
            } else if (x instanceof PyString) {
                v.append(x.toString());
            }
            else {
                /* wrong return value */
                throw Py.TypeError("character mapping must return integer, " +
                                   "None or unicode");
            }
        }
        return decode_tuple(v.toString(), size);
    }





    public static PyTuple charmap_encode(String str, String errors,
                                         PyObject mapping) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);

        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            PyObject w = Py.newInteger(ch);
            PyObject x = mapping.__finditem__(w);
            if (x == null) {
                /* No mapping found: default to Latin-1 mapping if possible */
                if (ch < 256)
                    v.append(ch);
                else
                    codecs.encoding_error("charmap", v, errors,
                                          "missing character mapping");
                continue;
            }
            if (x instanceof PyInteger) {
                int value = ((PyInteger) x).getValue();
                if (value < 0 || value > 255)
                    throw Py.TypeError(
                            "character mapping must be in range(256)");
                v.append((char) value);
            } else if (x == Py.None) {
                codecs.encoding_error("charmap", v,  errors,
                                      "character maps to ");
            } else if (x instanceof PyString) {
                v.append(x.toString());
            }
            else {
                /* wrong return value */
                throw Py.TypeError("character mapping must return " +
                                   "integer, None or unicode");
            }
        }
        return encode_tuple(v.toString(), size);
    }



    /* --- 7-bit ASCII Codec -------------------------------------------- */

    public static PyTuple ascii_decode(String str) {
        return ascii_decode(str, null);
    }

    public static PyTuple ascii_decode(String str, String errors) {
        int size = str.length();
        return decode_tuple(codecs.PyUnicode_DecodeASCII(str, size, errors),
                                                                        size);
    }


    public static PyTuple ascii_encode(String str) {
        return ascii_encode(str, null);
    }

    public static PyTuple ascii_encode(String str, String errors) {
        int size = str.length();
        return encode_tuple(codecs.PyUnicode_EncodeASCII(str, size, errors),
                                                                        size);
    }


    /* --- Latin-1 Codec -------------------------------------------- */

    public static PyTuple latin_1_decode(String str) {
        return latin_1_decode(str, null);
    }

    public static PyTuple latin_1_decode(String str, String errors) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);

        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch < 256) {
                v.append(ch);
            } else {
                codecs.decoding_error("latin-1", v, errors,
                                      "ordinal not in range(256)");
                i++;
                continue;
            }
        }

        return decode_tuple(v.toString(), size);
    }


    public static PyTuple latin_1_encode(String str) {
        return latin_1_encode(str, null);
    }

    public static PyTuple latin_1_encode(String str, String errors) {
        int size = str.length();
        StringBuffer v = new StringBuffer(size);

        for (int i = 0; i < size; i++) {
            char ch = str.charAt(i);
            if (ch >= 256) {
                codecs.encoding_error("latin-1", v, errors,
                                      "ordinal not in range(256)");
            } else
                v.append(ch);
        }
        return encode_tuple(v.toString(), size);
    }


    /* --- UTF16 Codec -------------------------------------------- */


    public static PyTuple utf_16_encode(String str) {
        return utf_16_encode(str, null);
    }

    public static PyTuple utf_16_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, 0), str.length());
    }

    public static PyTuple utf_16_encode(String str, String errors,
                                       int byteorder) {
        return encode_tuple(encode_UTF16(str, errors, byteorder),
                           str.length());
    }

    public static PyTuple utf_16_le_encode(String str) {
        return utf_16_le_encode(str, null);
    }

    public static PyTuple utf_16_le_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, -1), str.length());
    }

    public static PyTuple utf_16_be_encode(String str) {
        return utf_16_be_encode(str, null);
    }

    public static PyTuple utf_16_be_encode(String str, String errors) {
        return encode_tuple(encode_UTF16(str, errors, 1), str.length());
    }


    private static String encode_UTF16(String str, String errors,
                                      int byteorder) {
        int size = str.length();
        StringBuffer v = new StringBuffer((size +
                                       (byteorder == 0 ? 1 : 0)) * 2);

        if (byteorder == 0) {
            v.append((char) 0xFE);
            v.append((char) 0xFF);
        }

        if (byteorder == 0 || byteorder == 1)
            for (int i = 0; i < size; i++) {
                char ch = str.charAt(i);
                v.append((char) ((ch >>> 8) & 0xFF));
                v.append((char) (ch & 0xFF));
            }
        else {
            for (int i = 0; i < size; i++) {
                char ch = str.charAt(i);
                v.append((char) (ch & 0xFF));
                v.append((char) ((ch >>> 8) & 0xFF));
            }
        }

        return v.toString();
    }




    public static PyTuple utf_16_decode(String str) {
        return utf_16_decode(str, null);
    }

    public static PyTuple utf_16_decode(String str, String errors) {
        int[] bo = new int[] { 0 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }

    public static PyTuple utf_16_decode(String str, String errors,
                                        int byteorder) {
        int[] bo = new int[] { byteorder };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }

    public static PyTuple utf_16_le_decode(String str) {
        return utf_16_le_decode(str, null);
    }

    public static PyTuple utf_16_le_decode(String str, String errors) {
        int[] bo = new int[] { -1 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }

    public static PyTuple utf_16_be_decode(String str) {
        return utf_16_be_decode(str, null);
    }

    public static PyTuple utf_16_be_decode(String str, String errors) {
        int[] bo = new int[] { 1 };
        return decode_tuple(decode_UTF16(str, errors, bo), str.length());
    }

    public static PyTuple utf_16_ex_decode(String str) {
        return utf_16_ex_decode(str, null);
    }

    public static PyTuple utf_16_ex_decode(String str, String errors) {
        return utf_16_ex_decode(str, errors, 0);
    }

    public static PyTuple utf_16_ex_decode(String str, String errors,
                                           int byteorder) {
        int[] bo = new int[] { 0 };
        String s = decode_UTF16(str, errors, bo);
        return new PyTuple(new PyObject[] {
             Py.newString(s),
             Py.newInteger(str.length()),
             Py.newInteger(bo[0])
        });
    }

    private static String decode_UTF16(String str, String errors,
                                       int[] byteorder) {
        int bo = 0;
        if (byteorder != null)
             bo = byteorder[0];

        int size = str.length();

        if (size % 2 != 0)
            codecs.decoding_error("UTF16", null, errors, "truncated data");

        StringBuffer v = new StringBuffer(size/2);

        for (int i = 0; i < size; i += 2) {
            char ch1 = str.charAt(i);
            char ch2 = str.charAt(i+1);
            if (ch1 == 0xFE && ch2 == 0xFF) {
                bo = 1;
                continue;
            } else if (ch1 == 0xFF && ch2 == 0xFE) {
                bo = -1;
                continue;
            }

            char ch;
            if (bo == -1)
                ch = (char) (ch2 << 8 | ch1);
            else
                ch = (char) (ch1 << 8 | ch2);

            if (ch < 0xD800 || ch > 0xDFFF) {
                v.append(ch);
                continue;
            }


            /* UTF-16 code pair: */
            if (i == size-1) {
                codecs.decoding_error("UTF-16", v, errors,
                                      "unexpected end of data");
                continue;
            }

            ch = str.charAt(++i);
            if (0xDC00 <= ch && ch <= 0xDFFF) {
                ch = str.charAt(++i);
                if (0xD800 <= ch && ch <= 0xDBFF)
                    /* This is valid data (a UTF-16 surrogate pair), but
                       we are not able to store this information since our
                       Py_UNICODE type only has 16 bits... this might
                       change someday, even though it's unlikely. */
                    codecs.decoding_error("UTF-16", v, errors,
                                          "code pairs are not supported");
                continue;
            }
            codecs.decoding_error("UTF-16", v, errors, "illegal encoding");
        }

        if (byteorder != null)
            byteorder[0] = bo;

        return v.toString();
    }



    /* --- RawUnicodeEscape Codec ----------------------------------------- */


    public static PyTuple raw_unicode_escape_encode(String str) {
        return raw_unicode_escape_encode(str, null);
    }

    public static PyTuple raw_unicode_escape_encode(String str,
                                                   String errors) {
        return encode_tuple(codecs.PyUnicode_EncodeRawUnicodeEscape(str,
                                                             errors, false),
                           str.length());
    }


    public static PyTuple raw_unicode_escape_decode(String str) {
        return raw_unicode_escape_decode(str, null);
    }

    public static PyTuple raw_unicode_escape_decode(String str,
                                                    String errors) {
        return decode_tuple(codecs.PyUnicode_DecodeRawUnicodeEscape(str,
                                                             errors),
                           str.length());
    }



    /* --- UnicodeEscape Codec -------------------------------------------- */


    public static PyTuple unicode_escape_encode(String str) {
        return unicode_escape_encode(str, null);
    }

    public static PyTuple unicode_escape_encode(String str, String errors) {
        return encode_tuple(PyString.encode_UnicodeEscape(str, false),
                           str.length());
    }

    public static PyTuple unicode_escape_decode(String str) {
        return unicode_escape_decode(str, null);
    }

    public static PyTuple unicode_escape_decode(String str, String errors) {
        int n = str.length();
        return decode_tuple(PyString.decode_UnicodeEscape(str,
                                                     0, n, errors, true), n);
    }



    /* --- UnicodeInternal Codec ------------------------------------------ */


    public static PyTuple unicode_internal_encode(String str) {
        return unicode_internal_encode(str, null);
    }

    public static PyTuple unicode_internal_encode(String str, String errors) {
        return encode_tuple(str, str.length());
    }

    public static PyTuple unicode_internal_decode(String str) {
        return unicode_internal_decode(str, null);
    }

    public static PyTuple unicode_internal_decode(String str, String errors) {
        return decode_tuple(str, str.length());
    }

}