Lib.test.test_unicode_jy.py Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython-standalone Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented language Python written in 100% Pure Java, and seamlessly integrated with the Java platform. It thus allows you to run Python on any Java platform.
There is a newer version: 2.7.4
Show newest version
# -*- coding: utf-8 -*-
"""Misc unicode tests

Made for Jython.
"""
import itertools
import random
import re
import string
import sys
import unittest
from StringIO import StringIO
from test import test_support
from java.lang import StringBuilder

class UnicodeTestCase(unittest.TestCase):

    def test_simplejson_plane_bug(self):
        # a bug exposed by simplejson: unicode __add__ was always
        # forcing the basic plane
        chunker = re.compile(r'(.*?)(["\\\x00-\x1f])', re.VERBOSE | re.MULTILINE | re.DOTALL)
        orig = u'z\U0001d120x'
        quoted1 = u'"z\U0001d120x"'
        quoted2 = '"' + orig + '"'
        # chunker re gives different results depending on the plane
        self.assertEqual(chunker.match(quoted1, 1).groups(), (orig, u'"'))
        self.assertEqual(chunker.match(quoted2, 1).groups(), (orig, u'"'))

    def test_parse_unicode(self):
        foo = u'ą\n'
        self.assertEqual(len(foo), 2, repr(foo))
        self.assertEqual(repr(foo), "u'\\u0105\\n'")
        self.assertEqual(ord(foo[0]), 261)
        self.assertEqual(ord(foo[1]), 10)

        bar = foo.encode('utf-8')
        self.assertEqual(len(bar), 3)
        self.assertEqual(repr(bar), "'\\xc4\\x85\\n'")
        self.assertEqual(ord(bar[0]), 196)
        self.assertEqual(ord(bar[1]), 133)
        self.assertEqual(ord(bar[2]), 10)

    def test_parse_raw_unicode(self):
        foo = ur'ą\n'
        self.assertEqual(len(foo), 3, repr(foo))
        self.assertEqual(repr(foo), "u'\\u0105\\\\n'")
        self.assertEqual(ord(foo[0]), 261)
        self.assertEqual(ord(foo[1]), 92)
        self.assertEqual(ord(foo[2]), 110)

        bar = foo.encode('utf-8')
        self.assertEqual(len(bar), 4)
        self.assertEqual(repr(bar), "'\\xc4\\x85\\\\n'")
        self.assertEqual(ord(bar[0]), 196)
        self.assertEqual(ord(bar[1]), 133)
        self.assertEqual(ord(bar[2]), 92)
        self.assertEqual(ord(bar[3]), 110)

        for baz in ur'Hello\u0020World !', ur'Hello\U00000020World !':
            self.assertEqual(len(baz), 13, repr(baz))
            self.assertEqual(repr(baz), "u'Hello World !'")
            self.assertEqual(ord(baz[5]), 32)

        quux = ur'\U00100000'
        self.assertEqual(repr(quux), "u'\\U00100000'")
        if sys.maxunicode == 0xffff:
            self.assertEqual(len(quux), 2)
            self.assertEqual(ord(quux[0]), 56256)
            self.assertEqual(ord(quux[1]), 56320)
        else:
            self.assertEqual(len(quux), 1)
            self.assertEqual(ord(quux), 1048576)

    def test_raw_unicode_escape(self):
        foo = u'\U00100000'
        self.assertEqual(foo.encode('raw_unicode_escape'), '\\U00100000')
        self.assertEqual(foo.encode('raw_unicode_escape').decode('raw_unicode_escape'),
                         foo)
        for bar in '\\u', '\\u000', '\\U00000':
            self.assertRaises(UnicodeDecodeError, bar.decode, 'raw_unicode_escape')

    def test_encode_decimal(self):
        self.assertEqual(int(u'\u0039\u0032'), 92)
        self.assertEqual(int(u'\u0660'), 0)
        self.assertEqual(int(u' \u001F\u0966\u096F\u0039'), 99)
        self.assertEqual(long(u'\u0663'), 3)
        self.assertEqual(float(u'\u0663.\u0661'), 3.1)
        self.assertEqual(complex(u'\u0663.\u0661'), 3.1+0j)

    def test_unstateful_end_of_data(self):
        # http://bugs.jython.org/issue1368
        for encoding in 'utf-8', 'utf-16', 'utf-16-be', 'utf-16-le':
            self.assertRaises(UnicodeDecodeError, '\xe4'.decode, encoding)

    def test_formatchar(self):
        self.assertEqual('%c' % 255, '\xff')
        self.assertRaises(OverflowError, '%c'.__mod__, 256)

        result = u'%c' % 256
        self.assert_(isinstance(result, unicode))
        self.assertEqual(result, u'\u0100')
        if sys.maxunicode == 0xffff:
            self.assertEqual(u'%c' % sys.maxunicode, u'\uffff')
        else:
            self.assertEqual(u'%c' % sys.maxunicode, u'\U0010ffff')
        self.assertRaises(OverflowError, '%c'.__mod__, sys.maxunicode + 1)

    def test_repr(self):
        self.assert_(isinstance('%r' % u'foo', str))

    def test_unicode_lone_surrogate(self):
        # http://bugs.jython.org/issue2190
        self.assertRaises(ValueError, unichr, 0xd800)
        self.assertRaises(ValueError, unichr, 0xdfff)

    def test_concat(self):
        self.assertRaises(UnicodeDecodeError, lambda : u'' + '毛泽东')
        self.assertRaises(UnicodeDecodeError, lambda : '毛泽东' + u'')

    def test_join(self):
        self.assertRaises(UnicodeDecodeError, u''.join, ['foo', '毛泽东'])
        self.assertRaises(UnicodeDecodeError, '毛泽东'.join, [u'foo', u'bar'])

    def test_file_encoding(self):
        '''Ensure file writing doesn't attempt to encode things by default and reading doesn't
        decode things by default.  This was jython's behavior prior to 2.2.1'''
        EURO_SIGN = u"\u20ac"
        try:
            EURO_SIGN.encode()
        except UnicodeEncodeError:
            # This default encoding can't handle the encoding the Euro sign.  Skip the test
            return

        f = open(test_support.TESTFN, "w")
        self.assertRaises(UnicodeEncodeError, f, write, EURO_SIGN,
                "Shouldn't be able to write out a Euro sign without first encoding")
        f.close()

        f = open(test_support.TESTFN, "w")
        f.write(EURO_SIGN.encode('utf-8'))
        f.close()

        f = open(test_support.TESTFN, "r")
        encoded_euro = f.read()
        f.close()
        os.remove(test_support.TESTFN)
        self.assertEquals('\xe2\x82\xac', encoded_euro)
        self.assertEquals(EURO_SIGN, encoded_euro.decode('utf-8'))

    def test_translate(self):
        # http://bugs.jython.org/issue1483
        self.assertEqual(
            u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c'.translate({}),
            u'\u0443\u043a\u0430\u0437\u0430\u0442\u044c')
        self.assertEqual(u'\u0443oo'.translate({0x443: 102}), u'foo')
        self.assertEqual(
            unichr(sys.maxunicode).translate({sys.maxunicode: 102}),
            u'f')


class UnicodeMaterial(object):
    ''' Object holding a list of single characters and a unicode string
        that is their concatenation. The sequence is created from a
        background sequence of basic plane characters and random
        replacement with supplementary plane characters (those with
        point code>0xffff).
    '''

    base = tuple(u'abcdefghijklmnopqrstuvwxyz')
    supp = tuple(map(unichr, range(0x10000, 0x1000c)))
    used = sorted(set(base+supp))

    def __init__(self, size=20, pred=None, ran=None):
        ''' Create size chars choosing an SP char at i where
            pred(ran, i)==True where ran is an instance of
            random.Random supplied in the constructor or created
            locally (if ran==None).
        '''

        # Generators for the BMP and SP characters
        base = itertools.cycle(UnicodeMaterial.base)
        supp = itertools.cycle(UnicodeMaterial.supp)

        # Each instance gets a random generator
        if ran is None:
            ran = random.Random()
        self.random = ran

        if pred is None:
            pred = lambda ran, j : ran.random() < DEFAULT_RATE

        # Generate the list
        r = list()
        for i in range(size):
            if pred(self.random, i):
                c = supp.next()
            else:
                c = base.next()
            r.append(c)

        # The list and its concatenation are our material
        self.ref = r
        self.size = len(r)
        self.text = u''.join(r)
        self.target = u''

    def __len__(self):
        return self.size

    def insert(self, target, p=None):
        ''' Insert target string at position p (or middle), truncating if
            that would make the material any longer
        '''
        if p is None:
            p = max(0, (self.size-len(target)) // 2)

        n = 0
        for t in target:
            if p+n >= self.size:
                break;
            self.ref[p+n] = t
            n += 1

        self.target = target[:n]
        self.text = u''.join(self.ref)


class UnicodeIndexMixTest(unittest.TestCase):
    # Test indexing where there may be more than one code unit per code point.
    # See Jython Issue #2100.

    # Functions defining particular distributions of SP codes
    #
    def evenly(self, rate=0.2):
        'Evenly distributed at given rate'
        def f(ran, i):
            return ran.random() < rate
        return f

    def evenly_before(self, k, rate=0.2):
        'Evenly distributed on i=k at given rate'
        def f(ran, i):
            return i >= k and ran.random() < rate
        return f

    def at(self, places):
        'Only at specified places'
        def f(ran, i):
            return i in places
        return f

    def setUp(self):
        ran = random.Random(1234)  # ensure repeatable
        mat = list()
        mat.append(UnicodeMaterial(10, self.at([2]), ran))
        mat.append(UnicodeMaterial(10, self.at([2, 5]), ran))
        mat.append(UnicodeMaterial(50, self.evenly(), ran))
        mat.append(UnicodeMaterial(200, self.evenly_before(70), ran))
        mat.append(UnicodeMaterial(200, self.evenly_from(130), ran))
        mat.append(UnicodeMaterial(1000, self.evenly(), ran))

        self.material = mat


    def test_getitem(self):
        # Test map from to code point index to internal representation
        # Fails in Jython 2.7b3

        def check_getitem(m):
            # Check indexing the string returns the expected point code
            for i in xrange(m.size):
                self.assertEqual(m.text[i], m.ref[i])

        for m in self.material:
            check_getitem(m)

    def test_slice(self):
        # Test indexing gets the slice ends correct.
        # Passes in Jython 2.7b3, but may be touched by #2100 changes.

        def check_slice(m):
            # Check a range of slices against slices of the reference.
            n = 1
            while n <= m.size:
                for i in range(m.size - n):
                    exp = u''.join(m.ref[i:i+n])
                    self.assertEqual(m.text[i:i+n], exp)
                n *= 3

        for m in self.material:
            check_slice(m)

    def test_find(self):
        # Test map from internal find result to code point index
        # Fails in Jython 2.7b3

        def check_find(ref):
            # Check find returns indexes for single point codes
            for c in set(m.used):
                start = 0
                u = m.text
                while start < m.size:
                    i = u.find(c, start)
                    if i < 0: break
                    self.assertEqual(u[i], c)
                    self.assertGreaterEqual(i, start)
                    start = i + 1

        def check_find_str(m, t):
            # Check find returns correct index for string target
            i = m.text.find(t)
            self.assertEqual(list(t), m.ref[i:i+len(t)])

        targets = [
            u"this",
            u"ab\U00010041de", 
            u"\U00010041\U00010042\U00010042xx",
            u"xx\U00010041\U00010042\U00010043yy",
        ]

        for m in self.material:
            check_find(m)
            for t in targets:
                # Insert in middle then try to find it
                m.insert(t)
                check_find_str(m, t)

    def test_rfind(self):
        # Test map from internal rfind result to code point index
        # Fails in Jython 2.7b3

        def check_rfind(ref):
            # Check rfind returns indexes for single point codes
            for c in set(m.used):
                end = m.size
                u = m.text
                while True:
                    i = u.rfind(c, 0, end)
                    if i < 0: break
                    self.assertLess(i, end)
                    self.assertEqual(u[i], c)
                    end = i

        def check_rfind_str(m, t):
            # Check rfind returns correct index for string target
            i = m.text.rfind(t)
            self.assertEqual(list(t), m.ref[i:i+len(t)])

        targets = [
            u"this",
            u"ab\U00010041de", 
            u"\U00010041\U00010042\U00010042xx",
            u"xx\U00010041\U00010042\U00010043yy",
        ]

        for m in self.material:
            check_rfind(m)
            for t in targets:
                # Insert in middle then try to find it
                m.insert(t)
                check_rfind_str(m, t)

    def test_surrogate_validation(self):

        def insert_sb(text, c1, c2):
            # Insert code points c1, c2 in the text, as a Java StringBuilder
            sb = StringBuilder()
            # c1 at the quarter point
            p1 = len(mat) // 4
            for c in mat.text[:p1]:
                sb.appendCodePoint(ord(c))
            sb.appendCodePoint(c1)
            # c2 at the three-quarter point
            p2 = 3 * p1
            for c in mat.text[p1:p2]:
                sb.appendCodePoint(ord(c))
            sb.appendCodePoint(c2)
            # Rest of text
            for c in mat.text[p2:]:
                sb.appendCodePoint(ord(c))
            return sb

        # Test that lone surrogates are rejected
        for surr in [0xdc81, 0xdc00, 0xdfff, 0xd800, 0xdbff]:
            for mat in self.material:

                # Java StringBuilder with two private-use characters:
                sb = insert_sb(mat.text, 0xe000, 0xf000)
                # Check this is acceptable
                #print repr(unicode(sb))
                self.assertEqual(len(unicode(sb)), len(mat)+2)

                # Java StringBuilder with private-use and lone surrogate:
                sb = insert_sb(mat.text, 0xe000, surr)
                # Check this is detected
                #print repr(unicode(sb))
                self.assertRaises(ValueError, unicode, sb)


class UnicodeFormatTestCase(unittest.TestCase):

    def test_unicode_mapping(self):
        assertTrue = self.assertTrue
        class EnsureUnicode(dict):
            def __missing__(self, key):
                assertTrue(isinstance(key, unicode))
                return key
        u'%(foo)s' % EnsureUnicode()

    def test_non_ascii_unicode_mod_str(self):
        # Regression test for a problem on the formatting logic: when no unicode
        # args were found, Jython stored the resulting buffer on a PyString,
        # decoding it later to make a PyUnicode. That crashed when the left side
        # of % was a unicode containing non-ascii chars
        self.assertEquals(u"\u00e7%s" % "foo", u"\u00e7foo")


class UnicodeStdIOTestCase(unittest.TestCase):

    def setUp(self):
        self.stdout = sys.stdout

    def tearDown(self):
        sys.stdout = self.stdout

    def test_intercepted_stdout(self):
        msg = u'Circle is 360\u00B0'
        sys.stdout = StringIO()
        print msg,
        self.assertEqual(sys.stdout.getvalue(), msg)


class UnicodeFormatStrTest(unittest.TestCase):
    # Adapted from test_str StrTest by liberally adding u-prefixes.

    def test__format__(self):
        def test(value, format, expected):
            r = value.__format__(format)
            self.assertEqual(r, expected)
            # note 'xyz'==u'xyz', so must check type separately
            self.assertIsInstance(r, unicode)
            # also test both with the trailing 's'
            r = value.__format__(format + u's')
            self.assertEqual(r, expected)
            self.assertIsInstance(r, unicode)

        test(u'', '', '')
        test(u'abc', '', 'abc')
        test(u'abc', '.3', 'abc')
        test(u'ab', '.3', 'ab')
        test(u'abcdef', '.3', 'abc')
        test(u'abcdef', '.0', '')
        test(u'abc', '3.3', 'abc')
        test(u'abc', '2.3', 'abc')
        test(u'abc', '2.2', 'ab')
        test(u'abc', '3.2', 'ab ')
        test(u'result', 'x<0', 'result')
        test(u'result', 'x<5', 'result')
        test(u'result', 'x<6', 'result')
        test(u'result', 'x<7', 'resultx')
        test(u'result', 'x<8', 'resultxx')
        test(u'result', ' <7', 'result ')
        test(u'result', '<7', 'result ')
        test(u'result', '>7', ' result')
        test(u'result', '>8', '  result')
        test(u'result', '^8', ' result ')
        test(u'result', '^9', ' result  ')
        test(u'result', '^10', '  result  ')
        test(u'a', '10000', 'a' + ' ' * 9999)
        test(u'', '10000', ' ' * 10000)
        test(u'', '10000000', ' ' * 10000000)

    def test_format(self):
        self.assertEqual(u''.format(), '')
        self.assertEqual(u'a'.format(), 'a')
        self.assertEqual(u'ab'.format(), 'ab')
        self.assertEqual(u'a{{'.format(), 'a{')
        self.assertEqual(u'a}}'.format(), 'a}')
        self.assertEqual(u'{{b'.format(), '{b')
        self.assertEqual(u'}}b'.format(), '}b')
        self.assertEqual(u'a{{b'.format(), 'a{b')

        # examples from the PEP:
        import datetime
        self.assertEqual(u"My name is {0}".format('Fred'), "My name is Fred")
        self.assertIsInstance(u"My name is {0}".format('Fred'), unicode)
        self.assertEqual(u"My name is {0[name]}".format(dict(name='Fred')),
                         "My name is Fred")
        self.assertEqual(u"My name is {0} :-{{}}".format('Fred'),
                         "My name is Fred :-{}")

        d = datetime.date(2007, 8, 18)
        self.assertEqual(u"The year is {0.year}".format(d),
                         "The year is 2007")

        # classes we'll use for testing
        class C:
            def __init__(self, x=100):
                self._x = x
            def __format__(self, spec):
                return spec

        class D:
            def __init__(self, x):
                self.x = x
            def __format__(self, spec):
                return str(self.x)

        # class with __str__, but no __format__
        class E:
            def __init__(self, x):
                self.x = x
            def __str__(self):
                return 'E(' + self.x + ')'

        # class with __repr__, but no __format__ or __str__
        class F:
            def __init__(self, x):
                self.x = x
            def __repr__(self):
                return 'F(' + self.x + ')'

        # class with __format__ that forwards to string, for some format_spec's
        class G:
            def __init__(self, x):
                self.x = x
            def __str__(self):
                return "string is " + self.x
            def __format__(self, format_spec):
                if format_spec == 'd':
                    return 'G(' + self.x + ')'
                return object.__format__(self, format_spec)

        # class that returns a bad type from __format__
        class H:
            def __format__(self, format_spec):
                return 1.0

        class I(datetime.date):
            def __format__(self, format_spec):
                return self.strftime(format_spec)

        class J(int):
            def __format__(self, format_spec):
                return int.__format__(self * 2, format_spec)


        self.assertEqual(u''.format(), '')
        self.assertEqual(u'abc'.format(), 'abc')
        self.assertEqual(u'{0}'.format('abc'), 'abc')
        self.assertEqual(u'{0:}'.format('abc'), 'abc')
        self.assertEqual(u'X{0}'.format('abc'), 'Xabc')
        self.assertEqual(u'{0}X'.format('abc'), 'abcX')
        self.assertEqual(u'X{0}Y'.format('abc'), 'XabcY')
        self.assertEqual(u'{1}'.format(1, 'abc'), 'abc')
        self.assertEqual(u'X{1}'.format(1, 'abc'), 'Xabc')
        self.assertEqual(u'{1}X'.format(1, 'abc'), 'abcX')
        self.assertEqual(u'X{1}Y'.format(1, 'abc'), 'XabcY')
        self.assertEqual(u'{0}'.format(-15), '-15')
        self.assertEqual(u'{0}{1}'.format(-15, 'abc'), '-15abc')
        self.assertEqual(u'{0}X{1}'.format(-15, 'abc'), '-15Xabc')
        self.assertEqual(u'{{'.format(), '{')
        self.assertEqual(u'}}'.format(), '}')
        self.assertEqual(u'{{}}'.format(), '{}')
        self.assertEqual(u'{{x}}'.format(), '{x}')
        self.assertEqual(u'{{{0}}}'.format(123), '{123}')
        self.assertEqual(u'{{{{0}}}}'.format(), '{{0}}')
        self.assertEqual(u'}}{{'.format(), '}{')
        self.assertEqual(u'}}x{{'.format(), '}x{')

        # weird field names
        self.assertEqual(u"{0[foo-bar]}".format({'foo-bar':'baz'}), 'baz')
        self.assertEqual(u"{0[foo bar]}".format({'foo bar':'baz'}), 'baz')
        self.assertEqual(u"{0[ ]}".format({' ':3}), '3')

        self.assertEqual(u'{foo._x}'.format(foo=C(20)), '20')
        self.assertEqual(u'{1}{0}'.format(D(10), D(20)), '2010')
        self.assertEqual(u'{0._x.x}'.format(C(D('abc'))), 'abc')
        self.assertEqual(u'{0[0]}'.format(['abc', 'def']), 'abc')
        self.assertEqual(u'{0[1]}'.format(['abc', 'def']), 'def')
        self.assertEqual(u'{0[1][0]}'.format(['abc', ['def']]), 'def')
        self.assertEqual(u'{0[1][0].x}'.format(['abc', [D('def')]]), 'def')

        self.assertIsInstance(u'{0[1][0].x}'.format(['abc', [D('def')]]), unicode)

        # strings
        self.assertEqual(u'{0:.3s}'.format('abc'), 'abc')
        self.assertEqual(u'{0:.3s}'.format('ab'), 'ab')
        self.assertEqual(u'{0:.3s}'.format('abcdef'), 'abc')
        self.assertEqual(u'{0:.0s}'.format('abcdef'), '')
        self.assertEqual(u'{0:3.3s}'.format('abc'), 'abc')
        self.assertEqual(u'{0:2.3s}'.format('abc'), 'abc')
        self.assertEqual(u'{0:2.2s}'.format('abc'), 'ab')
        self.assertEqual(u'{0:3.2s}'.format('abc'), 'ab ')
        self.assertEqual(u'{0:x<0s}'.format('result'), 'result')
        self.assertEqual(u'{0:x<5s}'.format('result'), 'result')
        self.assertEqual(u'{0:x<6s}'.format('result'), 'result')
        self.assertEqual(u'{0:x<7s}'.format('result'), 'resultx')
        self.assertEqual(u'{0:x<8s}'.format('result'), 'resultxx')
        self.assertEqual(u'{0: <7s}'.format('result'), 'result ')
        self.assertEqual(u'{0:<7s}'.format('result'), 'result ')
        self.assertEqual(u'{0:>7s}'.format('result'), ' result')
        self.assertEqual(u'{0:>8s}'.format('result'), '  result')
        self.assertEqual(u'{0:^8s}'.format('result'), ' result ')
        self.assertEqual(u'{0:^9s}'.format('result'), ' result  ')
        self.assertEqual(u'{0:^10s}'.format('result'), '  result  ')
        self.assertEqual(u'{0:10000}'.format('a'), 'a' + ' ' * 9999)
        self.assertEqual(u'{0:10000}'.format(''), ' ' * 10000)
        self.assertEqual(u'{0:10000000}'.format(''), ' ' * 10000000)

        # format specifiers for user defined type
        self.assertEqual(u'{0:abc}'.format(C()), 'abc')

        # !r and !s coercions
        self.assertEqual(u'{0!s}'.format('Hello'), 'Hello')
        self.assertEqual(u'{0!s:}'.format('Hello'), 'Hello')
        self.assertEqual(u'{0!s:15}'.format('Hello'), 'Hello          ')
        self.assertEqual(u'{0!s:15s}'.format('Hello'), 'Hello          ')
        self.assertEqual(u'{0!r}'.format('Hello'), "'Hello'")
        self.assertEqual(u'{0!r:}'.format('Hello'), "'Hello'")
        self.assertEqual(u'{0!r}'.format(F('Hello')), 'F(Hello)')

        # test fallback to object.__format__
        self.assertEqual(u'{0}'.format({}), '{}')
        self.assertEqual(u'{0}'.format([]), '[]')
        self.assertEqual(u'{0}'.format([1]), '[1]')
        self.assertEqual(u'{0}'.format(E('data')), 'E(data)')
        self.assertEqual(u'{0:d}'.format(G('data')), 'G(data)')
        self.assertEqual(u'{0!s}'.format(G('data')), 'string is data')

        msg = 'object.__format__ with a non-empty format string is deprecated'
        with test_support.check_warnings((msg, PendingDeprecationWarning)):
            self.assertEqual(u'{0:^10}'.format(E('data')), ' E(data)  ')
            self.assertEqual(u'{0:^10s}'.format(E('data')), ' E(data)  ')
            self.assertEqual(u'{0:>15s}'.format(G('data')), ' string is data')

        #FIXME: not supported in Jython yet:
        if not test_support.is_jython:
            self.assertEqual(u"{0:date: %Y-%m-%d}".format(I(year=2007,
                                                           month=8,
                                                           day=27)),
                             "date: 2007-08-27")

            # test deriving from a builtin type and overriding __format__
            self.assertEqual(u"{0}".format(J(10)), "20")


        # string format specifiers
        self.assertEqual(u'{0:}'.format('a'), 'a')

        # computed format specifiers
        self.assertEqual(u"{0:.{1}}".format('hello world', 5), 'hello')
        self.assertEqual(u"{0:.{1}s}".format('hello world', 5), 'hello')
        self.assertEqual(u"{0:.{precision}s}".format('hello world', precision=5), 'hello')
        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width=10, precision=5), 'hello     ')
        self.assertEqual(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), 'hello     ')

        self.assertIsInstance(u"{0:{width}.{precision}s}".format('hello world', width='10', precision='5'), unicode)

        # test various errors
        self.assertRaises(ValueError, u'{'.format)
        self.assertRaises(ValueError, u'}'.format)
        self.assertRaises(ValueError, u'a{'.format)
        self.assertRaises(ValueError, u'a}'.format)
        self.assertRaises(ValueError, u'{a'.format)
        self.assertRaises(ValueError, u'}a'.format)
        self.assertRaises(IndexError, u'{0}'.format)
        self.assertRaises(IndexError, u'{1}'.format, u'abc')
        self.assertRaises(KeyError,   u'{x}'.format)
        self.assertRaises(ValueError, u"}{".format)
        self.assertRaises(ValueError, u"{".format)
        self.assertRaises(ValueError, u"}".format)
        self.assertRaises(ValueError, u"abc{0:{}".format)
        self.assertRaises(ValueError, u"{0".format)
        self.assertRaises(IndexError, u"{0.}".format)
        self.assertRaises(ValueError, u"{0.}".format, 0)
        self.assertRaises(IndexError, u"{0[}".format)
        self.assertRaises(ValueError, u"{0[}".format, [])
        self.assertRaises(KeyError,   u"{0]}".format)
        self.assertRaises(ValueError, u"{0.[]}".format, 0)
        self.assertRaises(ValueError, u"{0..foo}".format, 0)
        self.assertRaises(ValueError, u"{0[0}".format, 0)
        self.assertRaises(ValueError, u"{0[0:foo}".format, 0)
        self.assertRaises(KeyError,   u"{c]}".format)
        self.assertRaises(ValueError, u"{{ {{{0}}".format, 0)
        self.assertRaises(ValueError, u"{0}}".format, 0)
        self.assertRaises(KeyError,   u"{foo}".format, bar=3)
        self.assertRaises(ValueError, u"{0!x}".format, 3)
        self.assertRaises(ValueError, u"{0!}".format, 0)
        self.assertRaises(ValueError, u"{0!rs}".format, 0)
        self.assertRaises(ValueError, u"{!}".format)
        self.assertRaises(IndexError, u"{:}".format)
        self.assertRaises(IndexError, u"{:s}".format)
        self.assertRaises(IndexError, u"{}".format)

        # issue 6089
        self.assertRaises(ValueError, u"{0[0]x}".format, [None])
        self.assertRaises(ValueError, u"{0[0](10)}".format, [None])

        # can't have a replacement on the field name portion
        self.assertRaises(TypeError, u'{0[{1}]}'.format, 'abcdefg', 4)

        # exceed maximum recursion depth
        self.assertRaises(ValueError, u"{0:{1:{2}}}".format, 'abc', 's', '')
        self.assertRaises(ValueError, u"{0:{1:{2:{3:{4:{5:{6}}}}}}}".format,
                          0, 1, 2, 3, 4, 5, 6, 7)

        # string format spec errors
        self.assertRaises(ValueError, u"{0:-s}".format, '')
        self.assertRaises(ValueError, format, "", u"-")
        self.assertRaises(ValueError, u"{0:=s}".format, '')

    def test_format_auto_numbering(self):
        class C:
            def __init__(self, x=100):
                self._x = x
            def __format__(self, spec):
                return spec

        self.assertEqual(u'{}'.format(10), '10')
        self.assertEqual(u'{:5}'.format('s'), 's    ')
        self.assertEqual(u'{!r}'.format('s'), "'s'")
        self.assertEqual(u'{._x}'.format(C(10)), '10')
        self.assertEqual(u'{[1]}'.format([1, 2]), '2')
        self.assertEqual(u'{[a]}'.format({'a':4, 'b':2}), '4')
        self.assertEqual(u'a{}b{}c'.format(0, 1), 'a0b1c')

        self.assertEqual(u'a{:{}}b'.format('x', '^10'), 'a    x     b')
        self.assertEqual(u'a{:{}x}b'.format(20, '#'), 'a0x14b')

        # can't mix and match numbering and auto-numbering
        self.assertRaises(ValueError, u'{}{1}'.format, 1, 2)
        self.assertRaises(ValueError, u'{1}{}'.format, 1, 2)
        self.assertRaises(ValueError, u'{:{1}}'.format, 1, 2)
        self.assertRaises(ValueError, u'{0:{}}'.format, 1, 2)

        # can mix and match auto-numbering and named
        self.assertEqual(u'{f}{}'.format(4, f='test'), 'test4')
        self.assertEqual(u'{}{f}'.format(4, f='test'), '4test')
        self.assertEqual(u'{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
        self.assertEqual(u'{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')


class StringModuleUnicodeTest(unittest.TestCase):
    # Taken from test_string ModuleTest and converted for unicode

    def test_formatter(self):

        def assertEqualAndUnicode(r, exp):
            self.assertEqual(r, exp)
            self.assertIsInstance(r, unicode)

        fmt = string.Formatter()
        assertEqualAndUnicode(fmt.format(u"foo"), "foo")
        assertEqualAndUnicode(fmt.format(u"foo{0}", "bar"), "foobar")
        assertEqualAndUnicode(fmt.format(u"foo{1}{0}-{1}", "bar", 6), "foo6bar-6")
        assertEqualAndUnicode(fmt.format(u"-{arg!r}-", arg='test'), "-'test'-")

        # override get_value ############################################
        class NamespaceFormatter(string.Formatter):
            def __init__(self, namespace={}):
                string.Formatter.__init__(self)
                self.namespace = namespace

            def get_value(self, key, args, kwds):
                if isinstance(key, (str, unicode)):
                    try:
                        # Check explicitly passed arguments first
                        return kwds[key]
                    except KeyError:
                        return self.namespace[key]
                else:
                    string.Formatter.get_value(key, args, kwds)

        fmt = NamespaceFormatter({'greeting':'hello'})
        assertEqualAndUnicode(fmt.format(u"{greeting}, world!"), 'hello, world!')


        # override format_field #########################################
        class CallFormatter(string.Formatter):
            def format_field(self, value, format_spec):
                return format(value(), format_spec)

        fmt = CallFormatter()
        assertEqualAndUnicode(fmt.format(u'*{0}*', lambda : 'result'), '*result*')


        # override convert_field ########################################
        class XFormatter(string.Formatter):
            def convert_field(self, value, conversion):
                if conversion == 'x':
                    return None
                return super(XFormatter, self).convert_field(value, conversion)

        fmt = XFormatter()
        assertEqualAndUnicode(fmt.format(u"{0!r}:{0!x}", 'foo', 'foo'), "'foo':None")


        # override parse ################################################
        class BarFormatter(string.Formatter):
            # returns an iterable that contains tuples of the form:
            # (literal_text, field_name, format_spec, conversion)
            def parse(self, format_string):
                for field in format_string.split('|'):
                    if field[0] == '+':
                        # it's markup
                        field_name, _, format_spec = field[1:].partition(':')
                        yield '', field_name, format_spec, None
                    else:
                        yield field, None, None, None

        fmt = BarFormatter()
        assertEqualAndUnicode(fmt.format(u'*|+0:^10s|*', 'foo'), '*   foo    *')

        # test all parameters used
        class CheckAllUsedFormatter(string.Formatter):
            def check_unused_args(self, used_args, args, kwargs):
                # Track which arguments actually got used
                unused_args = set(kwargs.keys())
                unused_args.update(range(0, len(args)))

                for arg in used_args:
                    unused_args.remove(arg)

                if unused_args:
                    raise ValueError("unused arguments")

        fmt = CheckAllUsedFormatter()
        # The next series should maybe also call assertEqualAndUnicode:
        #assertEqualAndUnicode(fmt.format(u"{0}", 10), "10")
        #assertEqualAndUnicode(fmt.format(u"{0}{i}", 10, i=100), "10100")
        #assertEqualAndUnicode(fmt.format(u"{0}{i}{1}", 10, 20, i=100), "1010020")
        # But string.Formatter.format returns bytes. See CPython Issue 15951.
        self.assertEqual(fmt.format(u"{0}", 10), "10")
        self.assertEqual(fmt.format(u"{0}{i}", 10, i=100), "10100")
        self.assertEqual(fmt.format(u"{0}{i}{1}", 10, 20, i=100), "1010020")
        self.assertRaises(ValueError, fmt.format, u"{0}{i}{1}", 10, 20, i=100, j=0)
        self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20)
        self.assertRaises(ValueError, fmt.format, u"{0}", 10, 20, i=100)
        self.assertRaises(ValueError, fmt.format, u"{i}", 10, 20, i=100)


def test_main():
    test_support.run_unittest(
                UnicodeTestCase,
                UnicodeIndexMixTest,
                UnicodeFormatTestCase,
                UnicodeStdIOTestCase,
                UnicodeFormatStrTest,
                StringModuleUnicodeTest,
            )


if __name__ == "__main__":
    test_main()