Lib.test.test_unicode_jy.py Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-standalone Show documentation
Show all versions of jython-standalone Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
# -*- coding: utf-8 -*-
"""Misc unicode tests
Made for Jython. (But it will run for CPython.)
"""
import itertools
import random
import re
import string
import sys
import unittest
from StringIO import StringIO
from test import test_support
class UnicodeTestCase(unittest.TestCase):
def test_simplejson_plane_bug(self):
# a bug exposed by simplejson: unicode __add__ was always
# forcing the basic plane
chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
orig = u'z\U0001d120x'
quoted1 = u'"z\U0001d120x"'
quoted2 = '"' + orig + '"'
# chunker re gives different results depending on the plane
self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))
def test_parse_unicode(self):
foo = u'ą\n'
self.assertEqual(len(foo), 2, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 10)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 3)
self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 10)
def test_parse_raw_unicode(self):
foo = ur'ą\n'
self.assertEqual(len(foo), 3, repr(foo))
self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
self.assertEqual(ord(foo[0]), 261)
self.assertEqual(ord(foo[1]), 92)
self.assertEqual(ord(foo[2]), 110)
bar = foo.encode('utf-8')
self.assertEqual(len(bar), 4)
self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
self.assertEqual(ord(bar[0]), 196)
self.assertEqual(ord(bar[1]), 133)
self.assertEqual(ord(bar[2]), 92)
self.assertEqual(ord(bar[3]), 110)
for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
self.assertEqual(len(baz), 13, repr(baz))
self.assertEqual(repr(baz), "u'Hello World !'")
self.assertEqual(ord(baz[5]), 32)
quux = ur'\U00100000'
self.assertEqual(repr(quux), "u'\\U00100000'")
if sys.maxunicode == 0xffff:
self.assertEqual(len(quux), 2)
self.assertEqual(ord(quux[0]), 56256)
self.assertEqual(ord(quux[1]), 56320)
else:
self.assertEqual(len(quux), 1)
self.assertEqual(ord(quux), 1048576)
def test_raw_unicode_escape(self):
foo = u'\U00100000'
self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
foo)
for bar in '\\u', '\\u000', '\\U00000':
self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')
def test_encode_decimal(self):
self.assertEqual(int(u'\u0039\u0032'), 92)
self.assertEqual(int(u'\u0660'), 0)
self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
self.assertEqual(long(u'\u0663'), 3)
self.assertEqual(float(u'\u0663.\u0661'), 3.1)
self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)
def test_unstateful_end_of_data(self):
# http://bugs.jython.org/issue1368
for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)
def test_formatchar(self):
self.assertEqual('%c' % 255, '\xff')
self.assertRaises(OverflowError, '%c'.__mod__, 256)
result = u'%c' % 256
self.assert_(isinstance(result, unicode))
self.assertEqual(result, u'\u0100')
if sys.maxunicode == 0xffff:
self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
else:
self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)
def test_repr(self):
self.assert_(isinstance('%r' % u'foo', str))
@unittest.skipUnless(test_support.is_jython, "Specific to Jython")
def test_unicode_lone_surrogate(self):
# http://bugs.jython.org/issue2190
self.assertRaises(ValueError, unichr, 0xd800)
self.assertRaises(ValueError, unichr, 0xdfff)
def test_concat(self):
self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')
def test_join(self):
self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])
def test_file_encoding(self):
'''Ensure file writing doesn't attempt to encode things by default and reading doesn't
decode things by default. This was jython's behavior prior to 2.2.1'''
EURO_SIGN = u"\u20ac"
try:
EURO_SIGN.encode()
except UnicodeEncodeError:
# This default encoding can't handle the encoding the Euro sign. Skip the test
return
f = open(test_support.TESTFN, "w")
self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
"Shouldn't be able to write out a Euro sign without first encoding")
f.close()
f = open(test_support.TESTFN, "w")
f.write(EURO_SIGN.encode('utf-8'))
f.close()
f = open(test_support.TESTFN, "r")
encoded_euro = f.read()
f.close()
os.remove(test_support.TESTFN)
self.assertEquals('\xe2\x82\xac', encoded_euro)
self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))
def test_translate(self):
# http://bugs.jython.org/issue1483
self.assertEqual(
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}),
u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c')
self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo')
self.assertEqual(
unichr(sys.maxunicode).translate({sys.maxunicode: 102}),
u'f')
class UnicodeMaterial(object):
''' Object holding a list of single characters and a unicode string
that is their concatenation. The sequence is created from a
background sequence of basic plane characters and random
replacement with supplementary plane characters (those with
point code>0xffff).
'''
base = tuple(u'abcdefghijklmnopqrstuvwxyz')
if sys.maxunicode < 0x10000:
# This is here to prevent error messages on a narrow CPython build.
supp = (u'NOT SUPPORTED',)
else:
supp = tuple(map(unichr, range(0x10000, 0x1000c)))
used = sorted(set(base+supp))
def __init__(self, size=20, pred=None, ran=None):
''' Create size chars choosing an SP char at i where
pred(ran, i)==True where ran is an instance of
random.Random supplied in the constructor or created
locally (if ran==None).
'''
# Generators for the BMP and SP characters
base = itertools.cycle(UnicodeMaterial.base)
supp = itertools.cycle(UnicodeMaterial.supp)
# Each instance gets a random generator
if ran is None:
ran = random.Random()
self.random = ran
if pred is None:
pred = lambda ran, j : ran.random() < DEFAULT_RATE
# Generate the list
r = list()
for i in range(size):
if pred(self.random, i):
c = supp.next()
else:
c = base.next()
r.append(c)
# The list and its concatenation are our material
self.ref = r
self.size = len(r)
self.text = u''.join(r)
self.target = u''
def __len__(self):
return self.size
def insert(self, target, p=None):
''' Insert target string at position p (or middle), truncating if
that would make the material any longer
'''
if p is None:
p = max(0, (self.size-len(target)) // 2)
n = 0
for t in target:
if p+n >= self.size:
break;
self.ref[p+n] = t
n += 1
self.target = target[:n]
self.text = u''.join(self.ref)
@unittest.skipUnless(test_support.is_jython, "Specific to Jython")
class UnicodeIndexMixTest(unittest.TestCase):
# Test indexing where there may be more than one code unit per code point.
# See Jython Issue #2100.
# Functions defining particular distributions of SP codes
#
def evenly(self, rate=0.2):
'Evenly distributed at given rate'
def f(ran, i):
return ran.random() < rate
return f
def evenly_before(self, k, rate=0.2):
'Evenly distributed on i=k at given rate'
def f(ran, i):
return i >= k and ran.random() < rate
return f
def at(self, places):
'Only at specified places'
def f(ran, i):
return i in places
return f
def setUp(self):
ran = random.Random(1234) # ensure repeatable
mat = list()
mat.append(UnicodeMaterial(10, self.at([2]), ran))
mat.append(UnicodeMaterial(10, self.at([2, 5]), ran))
mat.append(UnicodeMaterial(50, self.evenly(), ran))
mat.append(UnicodeMaterial(200, self.evenly_before(70), ran))
mat.append(UnicodeMaterial(200, self.evenly_from(130), ran))
mat.append(UnicodeMaterial(1000, self.evenly(), ran))
self.material = mat
def test_getitem(self):
# Test map from to code point index to internal representation
# Fails in Jython 2.7b3
def check_getitem(m):
# Check indexing the string returns the expected point code
for i in xrange(m.size):
self.assertEqual(m.text[i], m.ref[i])
for m in self.material:
check_getitem(m)
def test_slice(self):
# Test indexing gets the slice ends correct.
# Passes in Jython 2.7b3, but may be touched by #2100 changes.
def check_slice(m):
# Check a range of slices against slices of the reference.
n = 1
while n <= m.size:
for i in range(m.size - n):
exp = u''.join(m.ref[i:i+n])
self.assertEqual(m.text[i:i+n], exp)
n *= 3
for m in self.material:
check_slice(m)
def test_find(self):
# Test map from internal find result to code point index
# Fails in Jython 2.7b3
def check_find(ref):
# Check find returns indexes for single point codes
for c in set(m.used):
start = 0
u = m.text
while start < m.size:
i = u.find(c, start)
if i < 0: break
self.assertEqual(u[i], c)
self.assertGreaterEqual(i, start)
start = i + 1
def check_find_str(m, t):
# Check find returns correct index for string target
i = m.text.find(t)
self.assertEqual(list(t), m.ref[i:i+len(t)])
targets = [
u"this",
u"ab\U00010041de",
u"\U00010041\U00010042\U00010042xx",
u"xx\U00010041\U00010042\U00010043yy",
]
for m in self.material:
check_find(m)
for t in targets:
# Insert in middle then try to find it
m.insert(t)
check_find_str(m, t)
def test_rfind(self):
# Test map from internal rfind result to code point index
# Fails in Jython 2.7b3
def check_rfind(ref):
# Check rfind returns indexes for single point codes
for c in set(m.used):
end = m.size
u = m.text
while True:
i = u.rfind(c, 0, end)
if i < 0: break
self.assertLess(i, end)
self.assertEqual(u[i], c)
end = i
def check_rfind_str(m, t):
# Check rfind returns correct index for string target
i = m.text.rfind(t)
self.assertEqual(list(t), m.ref[i:i+len(t)])
targets = [
u"this",
u"ab\U00010041de",
u"\U00010041\U00010042\U00010042xx",
u"xx\U00010041\U00010042\U00010043yy",
]
for m in self.material:
check_rfind(m)
for t in targets:
# Insert in middle then try to find it
m.insert(t)
check_rfind_str(m, t)
def test_surrogate_validation(self):
from java.lang import StringBuilder
def insert_sb(text, c1, c2):
# Insert code points c1, c2 in the text, as a Java StringBuilder
sb = StringBuilder()
# c1 at the quarter point
p1 = len(mat) // 4
for c in mat.text[:p1]:
sb.appendCodePoint(ord(c))
sb.appendCodePoint(c1)
# c2 at the three-quarter point
p2 = 3 * p1
for c in mat.text[p1:p2]:
sb.appendCodePoint(ord(c))
sb.appendCodePoint(c2)
# Rest of text
for c in mat.text[p2:]:
sb.appendCodePoint(ord(c))
return sb
# Test that lone surrogates are rejected
for surr in [0xdc81, 0xdc00, 0xdfff, 0xd800, 0xdbff]:
for mat in self.material:
# Java StringBuilder with two private-use characters:
sb = insert_sb(mat.text, 0xe000, 0xf000)
# Check this is acceptable
#print repr(unicode(sb))
self.assertEqual(len(unicode(sb)), len(mat)+2)
# Java StringBuilder with private-use and lone surrogate:
sb = insert_sb(mat.text, 0xe000, surr)
# Check this is detected
#print repr(unicode(sb))
self.assertRaises(ValueError, unicode, sb)
class UnicodeFormatTestCase(unittest.TestCase):
def test_unicode_mapping(self):
assertTrue = self.assertTrue
class EnsureUnicode(dict):
def __missing__(self, key):
assertTrue(isinstance(key, unicode))
return key
u'%(foo)s' % EnsureUnicode()
def test_non_ascii_unicode_mod_str(self):
# Regression test for a problem on the formatting logic: when no unicode
# args were found, Jython stored the resulting buffer on a PyString,
# decoding it later to make a PyUnicode. That crashed when the left side
# of % was a unicode containing non-ascii chars
self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")
class UnicodeStdIOTestCase(unittest.TestCase):
def setUp(self):
self.stdout = sys.stdout
def tearDown(self):
sys.stdout = self.stdout
def test_intercepted_stdout(self):
msg = u'Circle is 360\u00B0'
sys.stdout = StringIO()
print msg,
self.assertEqual(sys.stdout.getvalue(), msg)
class UnicodeFormatStrTest(unittest.TestCase):
# Adapted from test_str StrTest by liberally adding u-prefixes.
def test__format__(self):
def test(value, format, expected):
r = value.__format__(format)
self.assertEqual(r, expected)
# note 'xyz'==u'xyz', so must check type separately
self.assertIsInstance(r, unicode)
# also test both with the trailing 's'
r = value.__format__(format + u's')
self.assertEqual(r, expected)
self.assertIsInstance(r, unicode)
test(u'', '', '')
test(u'abc', '', 'abc')
test(u'abc', '.3', 'abc')
test(u'ab', '.3', 'ab')
test(u'abcdef', '.3', 'abc')
test(u'abcdef', '.0', '')
test(u'abc', '3.3', 'abc')
test(u'abc', '2.3', 'abc')
test(u'abc', '2.2', 'ab')
test(u'abc', '3.2', 'ab ')
test(u'result', 'x<0', 'result')
test(u'result', 'x<5', 'result')
test(u'result', 'x<6', 'result')
test(u'result', 'x<7', 'resultx')
test(u'result', 'x<8', 'resultxx')
test(u'result', ' <7', 'result ')
test(u'result', '<7', 'result ')
test(u'result', '>7', ' result')
test(u'result', '>8', ' result')
test(u'result', '^8', ' result ')
test(u'result', '^9', ' result ')
test(u'result', '^10', ' result ')
test(u'a', '10000', 'a' + ' ' * 9999)
test(u'', '10000', ' ' * 10000)
test(u'', '10000000', ' ' * 10000000)
def test_format(self):
self.assertEqual(u''.format(), '')
self.assertEqual(u'a'.format(), 'a')
self.assertEqual(u'ab'.format(), 'ab')
self.assertEqual(u'a{{'.format(), 'a{')
self.assertEqual(u'a}}'.format(), 'a}')
self.assertEqual(u'{{b'.format(), '{b')
self.assertEqual(u'}}b'.format(), '}b')
self.assertEqual(u'a{{b'.format(), 'a{b')
# examples from the PEP:
import datetime
self.assertEqual(u"My name is {0}".format('Fred'), "My name is Fred")
self.assertIsInstance(u"My name is {0}".format('Fred'), unicode)
self.assertEqual(u"My name is {0[name]}".format(dict(name='Fred')),
"My name is Fred")
self.assertEqual(u"My name is {0} :-{{}}".format('Fred'),
"My name is Fred :-{}")
d = datetime.date(2007, 8, 18)
self.assertEqual(u"The year is {0.year}".format(d),
"The year is 2007")
# classes we'll use for testing
class C:
def __init__(self, x=100):
self._x = x
def __format__(self, spec):
return spec
class D:
def __init__(self, x):
self.x = x
def __format__(self, spec):
return str(self.x)
# class with __str__, but no __format__
class E:
def __init__(self, x):
self.x = x
def __str__(self):
return 'E(' + self.x + ')'
# class with __repr__, but no __format__ or __str__
class F:
def __init__(self, x):
self.x = x
def __repr__(self):
return 'F(' + self.x + ')'
# class with __format__ that forwards to string, for some format_spec's
class G:
def __init__(self, x):
self.x = x
def __str__(self):
return "string is " + self.x
def __format__(self, format_spec):
if format_spec == 'd':
return 'G(' + self.x + ')'
return object.__format__(self, format_spec)
# class that returns a bad type from __format__
class H:
def __format__(self, format_spec):
return 1.0
class I(datetime.date):
def __format__(self, format_spec):
return self.strftime(format_spec)
class J(int):
def __format__(self, format_spec):
return int.__format__(self * 2, format_spec)
self.assertEqual(u''.format(), '')
self.assertEqual(u'abc'.format(), 'abc')
self.assertEqual(u'{0}'.format('abc'), 'abc')
self.assertEqual(u'{0:}'.format('abc'), 'abc')
self.assertEqual(u'X{0}'.format('abc'), 'Xabc')
self.assertEqual(u'{0}X'.format('abc'), 'abcX')
self.assertEqual(u'X{0}Y'.format('abc'), 'XabcY')
self.assertEqual(u'{1}'.format(1, 'abc'), 'abc')
self.assertEqual(u'X{1}'.format(1, 'abc'), 'Xabc')
self.assertEqual(u'{1}X'.format(1, 'abc'), 'abcX')
self.assertEqual(u'X{1}Y'.format(1, 'abc'), 'XabcY')
self.assertEqual(u'{0}'.format(-15), '-15')
self.assertEqual(u'{0}{1}'.format(-15, 'abc'), '-15abc')
self.assertEqual(u'{0}X{1}'.format(-15, 'abc'), '-15Xabc')
self.assertEqual(u'{{'.format(), '{')
self.assertEqual(u'}}'.format(), '}')
self.assertEqual(u'{{}}'.format(), '{}')
self.assertEqual(u'{{x}}'.format(), '{x}')
self.assertEqual(u'{{{0}}}'.format(123), '{123}')
self.assertEqual(u'{{{{0}}}}'.format(), '{{0}}')
self.assertEqual(u'}}{{'.format(), '}{')
self.assertEqual(u'}}x{{'.format(), '}x{')
# weird field names
self.assertEqual(u"{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
self.assertEqual(u"{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
self.assertEqual(u"{0[ ]}".format({' ':3}), '3')
self.assertEqual(u'{foo._x}'.format(foo=C(20)), '20')
self.assertEqual(u'{1}{0}'.format(D(10), D(20)), '2010')
self.assertEqual(u'{0._x.x}'.format(C(D('abc'))), 'abc')
self.assertEqual(u'{0[0]}'.format(['abc', 'def']), 'abc')
self.assertEqual(u'{0[1]}'.format(['abc', 'def']), 'def')
self.assertEqual(u'{0[1][0]}'.format(['abc', ['def']]), 'def')
self.assertEqual(u'{0[1][0].x}'.format(['abc', [D('def')]]), 'def')
self.assertIsInstance(u'{0[1][0].x}'.format(['abc', [D('def')]]), unicode)
# strings
self.assertEqual(u'{0:.3s}'.format('abc'), 'abc')
self.assertEqual(u'{0:.3s}'.format('ab'), 'ab')
self.assertEqual(u'{0:.3s}'.format('abcdef'), 'abc')
self.assertEqual(u'{0:.0s}'.format('abcdef'), '')
self.assertEqual(u'{0:3.3s}'.format('abc'), 'abc')
self.assertEqual(u'{0:2.3s}'.format('abc'), 'abc')
self.assertEqual(u'{0:2.2s}'.format('abc'), 'ab')
self.assertEqual(u'{0:3.2s}'.format('abc'), 'ab ')
self.assertEqual(u'{0:x<0s}'.format('result'), 'result')
self.assertEqual(u'{0:x<5s}'.format('result'), 'result')
self.assertEqual(u'{0:x<6s}'.format('result'), 'result')
self.assertEqual(u'{0:x<7s}'.format('result'), 'resultx')
self.assertEqual(u'{0:x<8s}'.format('result'), 'resultxx')
self.assertEqual(u'{0: <7s}'.format('result'), 'result ')
self.assertEqual(u'{0:<7s}'.format('result'), 'result ')
self.assertEqual(u'{0:>7s}'.format('result'), ' result')
self.assertEqual(u'{0:>8s}'.format('result'), ' result')
self.assertEqual(u'{0:^8s}'.format('result'), ' result ')
self.assertEqual(u'{0:^9s}'.format('result'), ' result ')
self.assertEqual(u'{0:^10s}'.format('result'), ' result ')
self.assertEqual(u'{0:10000}'.format('a'), 'a' + ' ' * 9999)
self.assertEqual(u'{0:10000}'.format(''), ' ' * 10000)
self.assertEqual(u'{0:10000000}'.format(''), ' ' * 10000000)
# format specifiers for user defined type
self.assertEqual(u'{0:abc}'.format(C()), 'abc')
# !r and !s coercions
self.assertEqual(u'{0!s}'.format('Hello'), 'Hello')
self.assertEqual(u'{0!s:}'.format('Hello'), 'Hello')
self.assertEqual(u'{0!s:15}'.format('Hello'), 'Hello ')
self.assertEqual(u'{0!s:15s}'.format('Hello'), 'Hello ')
self.assertEqual(u'{0!r}'.format('Hello'), "'Hello'")
self.assertEqual(u'{0!r:}'.format('Hello'), "'Hello'")
self.assertEqual(u'{0!r}'.format(F('Hello')), 'F(Hello)')
# test fallback to object.__format__
self.assertEqual(u'{0}'.format({}), '{}')
self.assertEqual(u'{0}'.format([]), '[]')
self.assertEqual(u'{0}'.format([1]), '[1]')
self.assertEqual(u'{0}'.format(E('data')), 'E(data)')
self.assertEqual(u'{0:d}'.format(G('data')), 'G(data)')
self.assertEqual(u'{0!s}'.format(G('data')), 'string is data')
msg = 'object.__format__ with a non-empty format string is deprecated'
with test_support.check_warnings((msg, PendingDeprecationWarning)):
self.assertEqual(u'{0:^10}'.format(E('data')), ' E(data) ')
self.assertEqual(u'{0:^10s}'.format(E('data')), ' E(data) ')
self.assertEqual(u'{0:>15s}'.format(G('data')), ' string is data')
#FIXME: not supported in Jython yet:
if not test_support.is_jython:
self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
month=8,
day=27)),
"date: 2007-08-27")
# test deriving from a builtin type and overriding __format__
self.assertEqual(u"{0}".format(J(10)), "20")
# string format specifiers
self.assertEqual(u'{0:}'.format('a'), 'a')
# computed format specifiers
self.assertEqual(u"{0:.{1}}".format('hello world', 5), 'hello')
self.assertEqual(u"{0:.{1}s}".format('hello world', 5), 'hello')
self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), 'hello')
self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello ')
self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello ')
self.assertIsInstance(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), unicode)
# test various errors
self.assertRaises(ValueError, u'{'.format)
self.assertRaises(ValueError, u'}'.format)
self.assertRaises(ValueError, u'a{'.format)
self.assertRaises(ValueError, u'a}'.format)
self.assertRaises(ValueError, u'{a'.format)
self.assertRaises(ValueError, u'}a'.format)
self.assertRaises(IndexError, u'{0}'.format)
self.assertRaises(IndexError, u'{1}'.format, u'abc')
self.assertRaises(KeyError, u'{x}'.format)
self.assertRaises(ValueError, u"}{".format)
self.assertRaises(ValueError, u"{".format)
self.assertRaises(ValueError, u"}".format)
self.assertRaises(ValueError, u"abc{0:{}".format)
self.assertRaises(ValueError, u"{0".format)
self.assertRaises(IndexError, u"{0.}".format)
self.assertRaises(ValueError, u"{0.}".format, 0)
self.assertRaises(IndexError, u"{0[}".format)
self.assertRaises(ValueError, u"{0[}".format, [])
self.assertRaises(KeyError, u"{0]}".format)
self.assertRaises(ValueError, u"{0.[]}".format, 0)
self.assertRaises(ValueError, u"{0..foo}".format, 0)
self.assertRaises(ValueError, u"{0[0}".format, 0)
self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
self.assertRaises(KeyError, u"{c]}".format)
self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
self.assertRaises(ValueError, u"{0}}".format, 0)
self.assertRaises(KeyError, u"{foo}".format, bar=3)
self.assertRaises(ValueError, u"{0!x}".format, 3)
self.assertRaises(ValueError, u"{0!}".format, 0)
self.assertRaises(ValueError, u"{0!rs}".format, 0)
self.assertRaises(ValueError, u"{!}".format)
self.assertRaises(IndexError, u"{:}".format)
self.assertRaises(IndexError, u"{:s}".format)
self.assertRaises(IndexError, u"{}".format)
# issue 6089
self.assertRaises(ValueError, u"{0[0]x}".format, [None])
self.assertRaises(ValueError, u"{0[0](10)}".format, [None])
# can't have a replacement on the field name portion
self.assertRaises(TypeError, u'{0[{1}]}'.format, 'abcdefg', 4)
# exceed maximum recursion depth
self.assertRaises(ValueError, u"{0:{1:{2}}}".format, 'abc', 's', '')
self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
0, 1, 2, 3, 4, 5, 6, 7)
# string format spec errors
self.assertRaises(ValueError, u"{0:-s}".format, '')
self.assertRaises(ValueError, format, "", u"-")
self.assertRaises(ValueError, u"{0:=s}".format, '')
def test_format_auto_numbering(self):
class C:
def __init__(self, x=100):
self._x = x
def __format__(self, spec):
return spec
self.assertEqual(u'{}'.format(10), '10')
self.assertEqual(u'{:5}'.format('s'), 's ')
self.assertEqual(u'{!r}'.format('s'), "'s'")
self.assertEqual(u'{._x}'.format(C(10)), '10')
self.assertEqual(u'{[1]}'.format([1, 2]), '2')
self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), '4')
self.assertEqual(u'a{}b{}c'.format(0, 1), 'a0b1c')
self.assertEqual(u'a{:{}}b'.format('x', '^10'), 'a x b')
self.assertEqual(u'a{:{}x}b'.format(20, '#'), 'a0x14b')
# can't mix and match numbering and auto-numbering
self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)
# can mix and match auto-numbering and named
self.assertEqual(u'{f}{}'.format(4, f='test'), 'test4')
self.assertEqual(u'{}{f}'.format(4, f='test'), '4test')
self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')
class StringModuleUnicodeTest(unittest.TestCase):
# Taken from test_string ModuleTest and converted for unicode
def test_formatter(self):
def assertEqualAndUnicode(r, exp):
self.assertEqual(r, exp)
self.assertIsInstance(r, unicode)
fmt = string.Formatter()
assertEqualAndUnicode(fmt.format(u"foo"), "foo")
assertEqualAndUnicode(fmt.format(u"foo{0}", "bar"), "foobar")
assertEqualAndUnicode(fmt.format(u"foo{1}{0}-{1}", "bar", 6), "foo6bar-6")
assertEqualAndUnicode(fmt.format(u"-{arg!r}-", arg='test'), "-'test'-")
# override get_value ############################################
class NamespaceFormatter(string.Formatter):
def __init__(self, namespace={}):
string.Formatter.__init__(self)
self.namespace = namespace
def get_value(self, key, args, kwds):
if isinstance(key, (str, unicode)):
try:
# Check explicitly passed arguments first
return kwds[key]
except KeyError:
return self.namespace[key]
else:
string.Formatter.get_value(key, args, kwds)
fmt = NamespaceFormatter({'greeting':'hello'})
assertEqualAndUnicode(fmt.format(u"{greeting}, world!"), 'hello, world!')
# override format_field #########################################
class CallFormatter(string.Formatter):
def format_field(self, value, format_spec):
return format(value(), format_spec)
fmt = CallFormatter()
assertEqualAndUnicode(fmt.format(u'*{0}*', lambda : 'result'), '*result*')
# override convert_field ########################################
class XFormatter(string.Formatter):
def convert_field(self, value, conversion):
if conversion == 'x':
return None
return super(XFormatter, self).convert_field(value, conversion)
fmt = XFormatter()
assertEqualAndUnicode(fmt.format(u"{0!r}:{0!x}", 'foo', 'foo'), "'foo':None")
# override parse ################################################
class BarFormatter(string.Formatter):
# returns an iterable that contains tuples of the form:
# (literal_text, field_name, format_spec, conversion)
def parse(self, format_string):
for field in format_string.split('|'):
if field[0] == '+':
# it's markup
field_name, _, format_spec = field[1:].partition(':')
yield '', field_name, format_spec, None
else:
yield field, None, None, None
fmt = BarFormatter()
assertEqualAndUnicode(fmt.format(u'*|+0:^10s|*', 'foo'), '* foo *')
# test all parameters used
class CheckAllUsedFormatter(string.Formatter):
def check_unused_args(self, used_args, args, kwargs):
# Track which arguments actually got used
unused_args = set(kwargs.keys())
unused_args.update(range(0, len(args)))
for arg in used_args:
unused_args.remove(arg)
if unused_args:
raise ValueError("unused arguments")
fmt = CheckAllUsedFormatter()
# The next series should maybe also call assertEqualAndUnicode:
#assertEqualAndUnicode(fmt.format(u"{0}", 10), "10")
#assertEqualAndUnicode(fmt.format(u"{0}{i}", 10, i=100), "10100")
#assertEqualAndUnicode(fmt.format(u"{0}{i}{1}", 10, 20, i=100), "1010020")
# But string.Formatter.format returns bytes. See CPython Issue 15951.
self.assertEqual(fmt.format(u"{0}", 10), "10")
self.assertEqual(fmt.format(u"{0}{i}", 10, i=100), "10100")
self.assertEqual(fmt.format(u"{0}{i}{1}", 10, 20, i=100), "1010020")
self.assertRaises(ValueError, fmt.format, u"{0}{i}{1}", 10, 20, i=100, j=0)
self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20)
self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20, i=100)
self.assertRaises(ValueError, fmt.format, u"{i}", 10, 20, i=100)
class UnicodeSpaceTest(unittest.TestCase):
# Test classification of characters as whitespace (some Jython divergence)
def checkequal(self, expected, obj, methodname, *args):
"check that object.method() returns expected result"
realresult = getattr(obj, methodname)()
grumble = "%r.%s() returned %r" % (obj, methodname, realresult)
self.assertEqual(expected, realresult, grumble)
# print grumble, 'x' if realresult != expected else '.'
# The set of Unicode characters that are spaces according to CPython 2.7.8
SPACE = u'\t\n\x0b\x0c\r\x1c\x1d\x1e\x1f\x20\x85\xa0\u1680\u180e' + \
u'\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a' + \
u'\u2028\u2029\u202f\u205f\u3000'
if test_support.is_jython:
# Not whitespace in Jython based on java.lang.Character.isWhitespace.
# This test documents the divergence, until we decide to remove it.
for c in u'\x85\xa0\u2007\u202f':
SPACE = SPACE.replace(c, u'')
def test_isspace(self):
for c in self.SPACE:
self.checkequal(True, c, 'isspace')
self.checkequal(True, u'\t' + c + u' ', 'isspace')
# *strip() tests to supplement string_tests with non-ascii examples,
# using characters that are spaces in latin-1 but not in ascii.
def test_strip(self):
for c in self.SPACE:
# These should be stripped of c at left or right
sp = u" " + c + u" "
h = u"hello"
s = sp + h + sp
self.checkequal( h, s, 'strip')
self.checkequal( h, c + s + c, 'strip')
self.checkequal( sp + h, s, 'rstrip')
self.checkequal( sp + h, s + c, 'rstrip')
self.checkequal( h + sp, s, 'lstrip')
self.checkequal( h + sp, c + s, 'lstrip')
def test_split(self):
for c in self.SPACE:
# These should be split at c
s = u"AAA" + c + u"BBB"
self.assertEqual(2, len(s.split()), "no split made in " + repr(s))
self.assertEqual(2, len(s.rsplit()), "no rsplit made in " + repr(s))
def test_main():
test_support.run_unittest(
UnicodeTestCase,
UnicodeIndexMixTest,
UnicodeFormatTestCase,
UnicodeStdIOTestCase,
UnicodeFormatStrTest,
StringModuleUnicodeTest,
UnicodeSpaceTest,
)
if __name__ == "__main__":
test_main()