Lib.test.test_unicode_jy.py Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython Show documentation
Show all versions of jython Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
# -*- coding: utf-8 -*-
"""Misc unicode tests
Made for Jython.
"""
import re
import sys
import unittest
from StringIO import StringIO
from test import test_support
class UnicodeTestCase(unittest.TestCase):
def test_simplejson_plane_bug(self):
# a bug exposed by simplejson: unicode __add__ was always
# forcing the basic plane
chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
orig = u'z\U0001d120x'
quoted1 = u'"z\U0001d120x"'
quoted2 = '"' + orig + '"'
# chunker re gives different results depending on the plane
self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
def test_parse_unicode(self):
foo = u'ą\n'
self.assertEqual(len(foo), 2, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 10)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 3)
self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 10)
def test_parse_raw_unicode(self):
foo = ur'ą\n'
self.assertEqual(len(foo), 3, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 92)
self.assertEqual(ord(foo[2]), 110)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 4)
self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 92)
self.assertEqual(ord(bar[3]), 110)
for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
self.assertEqual(len(baz), 13, repr(baz))
self.assertEqual(repr(baz), "u'Hello World !'")
self.assertEqual(ord(baz[5]), 32)
quux = ur'\U00100000'
self.assertEqual(repr(quux), "u'\\U00100000'")
if sys.maxunicode == 0xffff:
self.assertEqual(len(quux), 2)
self.assertEqual(ord(quux[0]), 56256)
self.assertEqual(ord(quux[1]), 56320)
else:
self.assertEqual(len(quux), 1)
self.assertEqual(ord(quux), 1048576)
def test_raw_unicode_escape(self):
foo = u'\U00100000'
self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
foo)
for bar in '\\u', '\\u000', '\\U00000':
self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')
def test_encode_decimal(self):
self.assertEqual(int(u'\u0039\u0032'), 92)
self.assertEqual(int(u'\u0660'), 0)
self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
self.assertEqual(long(u'\u0663'), 3)
self.assertEqual(float(u'\u0663.\u0661'), 3.1)
self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)
def test_unstateful_end_of_data(self):
# http://bugs.jython.org/issue1368
for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)
def test_formatchar(self):
self.assertEqual('%c' % 255, '\xff')
self.assertRaises(OverflowError, '%c'.__mod__, 256)
result = u'%c' % 256
self.assert_(isinstance(result, unicode))
self.assertEqual(result, u'\u0100')
if sys.maxunicode == 0xffff:
self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
else:
self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)
def test_repr(self):
self.assert_(isinstance('%r' % u'foo', str))
def test_concat(self):
self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')
def test_join(self):
self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
def test_file_encoding(self):
'''Ensure file writing doesn't attempt to encode things by default and reading doesn't
decode things by default. This was jython's behavior prior to 2.2.1'''
EURO_SIGN = u"\u20ac"
try:
EURO_SIGN.encode()
except UnicodeEncodeError:
# This default encoding can't handle the encoding the Euro sign. Skip the test
return
f = open(test_support.TESTFN, "w")
self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
"Shouldn't be able to write out a Euro sign without first encoding")
f.close()
f = open(test_support.TESTFN, "w")
f.write(EURO_SIGN.encode('utf-8'))
f.close()
f = open(test_support.TESTFN, "r")
encoded_euro = f.read()
f.close()
os.remove(test_support.TESTFN)
self.assertEquals('\xe2\x82\xac', encoded_euro)
self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
def test_translate(self):
# http://bugs.jython.org/issue1483
self.assertEqual(
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}),
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c')
self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo')
self.assertEqual(
unichr(sys.maxunicode).translate({sys.maxunicode: 102}),
u'f')
class UnicodeFormatTestCase(unittest.TestCase):
def test_unicode_mapping(self):
assertTrue = self.assertTrue
class EnsureUnicode(dict):
def __missing__(self, key):
assertTrue(isinstance(key, unicode))
return key
u'%(foo)s' % EnsureUnicode()
def test_non_ascii_unicode_mod_str(self):
# Regression test for a problem on the formatting logic: when no unicode
# args were found, Jython stored the resulting buffer on a PyString,
# decoding it later to make a PyUnicode. That crashed when the left side
# of % was a unicode containing non-ascii chars
self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")
class UnicodeStdIOTestCase(unittest.TestCase):
def setUp(self):
self.stdout = sys.stdout
def tearDown(self):
sys.stdout = self.stdout
def test_intercepted_stdout(self):
msg = u'Circle is 360\u00B0'
sys.stdout = StringIO()
print msg,
self.assertEqual(sys.stdout.getvalue(), msg)
def test_main():
test_support.run_unittest(UnicodeTestCase,
UnicodeFormatTestCase,
UnicodeStdIOTestCase)
if __name__ == "__main__":
test_main()