lib-python.2.5.test.test_multibytecodec_support.py Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jython Show documentation
Show all versions of jython Show documentation
Jython is an implementation of the high-level, dynamic, object-oriented
language Python written in 100% Pure Java, and seamlessly integrated with
the Java platform. It thus allows you to run Python on any Java platform.
#!/usr/bin/env python
#
# test_multibytecodec_support.py
# Common Unittest Routines for CJK codecs
#
import sys, codecs, os.path
import unittest, re
from test import test_support
from StringIO import StringIO
class TestBase:
encoding = '' # codec name
codec = None # codec tuple (with 4 elements)
tstring = '' # string to test StreamReader
codectests = None # must set. codec test tuple
roundtriptest = 1 # set if roundtrip is possible with unicode
has_iso10646 = 0 # set if this encoding contains whole iso10646 map
xmlcharnametest = None # string to test xmlcharrefreplace
unmappedunicode = u'\udeee' # a unicode codepoint that is not mapped.
def setUp(self):
if self.codec is None:
self.codec = codecs.lookup(self.encoding)
self.encode = self.codec.encode
self.decode = self.codec.decode
self.reader = self.codec.streamreader
self.writer = self.codec.streamwriter
self.incrementalencoder = self.codec.incrementalencoder
self.incrementaldecoder = self.codec.incrementaldecoder
def test_chunkcoding(self):
for native, utf8 in zip(*[StringIO(f).readlines()
for f in self.tstring]):
u = self.decode(native)[0]
self.assertEqual(u, utf8.decode('utf-8'))
if self.roundtriptest:
self.assertEqual(native, self.encode(u)[0])
def test_errorhandle(self):
for source, scheme, expected in self.codectests:
if type(source) == type(''):
func = self.decode
else:
func = self.encode
if expected:
result = func(source, scheme)[0]
self.assertEqual(result, expected)
else:
self.assertRaises(UnicodeError, func, source, scheme)
def test_xmlcharrefreplace(self):
if self.has_iso10646:
return
s = u"\u0b13\u0b23\u0b60 nd eggs"
self.assertEqual(
self.encode(s, "xmlcharrefreplace")[0],
"ଓଣୠ nd eggs"
)
def test_customreplace_encode(self):
if self.has_iso10646:
return
from htmlentitydefs import codepoint2name
def xmlcharnamereplace(exc):
if not isinstance(exc, UnicodeEncodeError):
raise TypeError("don't know how to handle %r" % exc)
l = []
for c in exc.object[exc.start:exc.end]:
if ord(c) in codepoint2name:
l.append(u"&%s;" % codepoint2name[ord(c)])
else:
l.append(u"%d;" % ord(c))
return (u"".join(l), exc.end)
codecs.register_error("test.xmlcharnamereplace", xmlcharnamereplace)
if self.xmlcharnametest:
sin, sout = self.xmlcharnametest
else:
sin = u"\xab\u211c\xbb = \u2329\u1234\u232a"
sout = "«ℜ» = 〈ሴ〉"
self.assertEqual(self.encode(sin,
"test.xmlcharnamereplace")[0], sout)
def test_callback_wrong_objects(self):
def myreplace(exc):
return (ret, exc.end)
codecs.register_error("test.cjktest", myreplace)
for ret in ([1, 2, 3], [], None, object(), 'string', ''):
self.assertRaises(TypeError, self.encode, self.unmappedunicode,
'test.cjktest')
def test_callback_long_index(self):
def myreplace(exc):
return (u'x', long(exc.end))
codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
'test.cjktest'), ('abcdxefgh', 9))
def myreplace(exc):
return (u'x', sys.maxint + 1)
codecs.register_error("test.cjktest", myreplace)
self.assertRaises(IndexError, self.encode, self.unmappedunicode,
'test.cjktest')
def test_callback_None_index(self):
def myreplace(exc):
return (u'x', None)
codecs.register_error("test.cjktest", myreplace)
self.assertRaises(TypeError, self.encode, self.unmappedunicode,
'test.cjktest')
def test_callback_backward_index(self):
def myreplace(exc):
if myreplace.limit > 0:
myreplace.limit -= 1
return (u'REPLACED', 0)
else:
return (u'TERMINAL', exc.end)
myreplace.limit = 3
codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
'test.cjktest'),
('abcdREPLACEDabcdREPLACEDabcdREPLACEDabcdTERMINALefgh', 9))
def test_callback_forward_index(self):
def myreplace(exc):
return (u'REPLACED', exc.end + 2)
codecs.register_error("test.cjktest", myreplace)
self.assertEqual(self.encode(u'abcd' + self.unmappedunicode + u'efgh',
'test.cjktest'), ('abcdREPLACEDgh', 9))
def test_callback_index_outofbound(self):
def myreplace(exc):
return (u'TERM', 100)
codecs.register_error("test.cjktest", myreplace)
self.assertRaises(IndexError, self.encode, self.unmappedunicode,
'test.cjktest')
def test_incrementalencoder(self):
UTF8Reader = codecs.getreader('utf-8')
for sizehint in [None] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = UTF8Reader(StringIO(self.tstring[1]))
ostream = StringIO()
encoder = self.incrementalencoder()
while 1:
if sizehint is not None:
data = istream.read(sizehint)
else:
data = istream.read()
if not data:
break
e = encoder.encode(data)
ostream.write(e)
self.assertEqual(ostream.getvalue(), self.tstring[0])
def test_incrementaldecoder(self):
UTF8Writer = codecs.getwriter('utf-8')
for sizehint in [None, -1] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = StringIO(self.tstring[0])
ostream = UTF8Writer(StringIO())
decoder = self.incrementaldecoder()
while 1:
data = istream.read(sizehint)
if not data:
break
else:
u = decoder.decode(data)
ostream.write(u)
self.assertEqual(ostream.getvalue(), self.tstring[1])
def test_incrementalencoder_error_callback(self):
inv = self.unmappedunicode
e = self.incrementalencoder()
self.assertRaises(UnicodeEncodeError, e.encode, inv, True)
e.errors = 'ignore'
self.assertEqual(e.encode(inv, True), '')
e.reset()
def tempreplace(exc):
return (u'called', exc.end)
codecs.register_error('test.incremental_error_callback', tempreplace)
e.errors = 'test.incremental_error_callback'
self.assertEqual(e.encode(inv, True), 'called')
# again
e.errors = 'ignore'
self.assertEqual(e.encode(inv, True), '')
def test_streamreader(self):
UTF8Writer = codecs.getwriter('utf-8')
for name in ["read", "readline", "readlines"]:
for sizehint in [None, -1] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = self.reader(StringIO(self.tstring[0]))
ostream = UTF8Writer(StringIO())
func = getattr(istream, name)
while 1:
data = func(sizehint)
if not data:
break
if name == "readlines":
ostream.writelines(data)
else:
ostream.write(data)
self.assertEqual(ostream.getvalue(), self.tstring[1])
def test_streamwriter(self):
readfuncs = ('read', 'readline', 'readlines')
UTF8Reader = codecs.getreader('utf-8')
for name in readfuncs:
for sizehint in [None] + range(1, 33) + \
[64, 128, 256, 512, 1024]:
istream = UTF8Reader(StringIO(self.tstring[1]))
ostream = self.writer(StringIO())
func = getattr(istream, name)
while 1:
if sizehint is not None:
data = func(sizehint)
else:
data = func()
if not data:
break
if name == "readlines":
ostream.writelines(data)
else:
ostream.write(data)
self.assertEqual(ostream.getvalue(), self.tstring[0])
if len(u'\U00012345') == 2: # ucs2 build
_unichr = unichr
def unichr(v):
if v >= 0x10000:
return _unichr(0xd800 + ((v - 0x10000) >> 10)) + \
_unichr(0xdc00 + ((v - 0x10000) & 0x3ff))
else:
return _unichr(v)
_ord = ord
def ord(c):
if len(c) == 2:
return 0x10000 + ((_ord(c[0]) - 0xd800) << 10) + \
(ord(c[1]) - 0xdc00)
else:
return _ord(c)
class TestBase_Mapping(unittest.TestCase):
pass_enctest = []
pass_dectest = []
supmaps = []
def __init__(self, *args, **kw):
unittest.TestCase.__init__(self, *args, **kw)
self.open_mapping_file() # test it to report the error early
def open_mapping_file(self):
return test_support.open_urlresource(self.mapfileurl)
def test_mapping_file(self):
if self.mapfileurl.endswith('.xml'):
self._test_mapping_file_ucm()
else:
self._test_mapping_file_plain()
def _test_mapping_file_plain(self):
unichrs = lambda s: u''.join(map(unichr, map(eval, s.split('+'))))
urt_wa = {}
for line in self.open_mapping_file():
if not line:
break
data = line.split('#')[0].strip().split()
if len(data) != 2:
continue
csetval = eval(data[0])
if csetval <= 0x7F:
csetch = chr(csetval & 0xff)
elif csetval >= 0x1000000:
csetch = chr(csetval >> 24) + chr((csetval >> 16) & 0xff) + \
chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
elif csetval >= 0x10000:
csetch = chr(csetval >> 16) + \
chr((csetval >> 8) & 0xff) + chr(csetval & 0xff)
elif csetval >= 0x100:
csetch = chr(csetval >> 8) + chr(csetval & 0xff)
else:
continue
unich = unichrs(data[1])
if ord(unich) == 0xfffd or urt_wa.has_key(unich):
continue
urt_wa[unich] = csetch
self._testpoint(csetch, unich)
def _test_mapping_file_ucm(self):
ucmdata = self.open_mapping_file().read()
uc = re.findall('', ucmdata)
for uni, coded in uc:
unich = unichr(int(uni, 16))
codech = ''.join(chr(int(c, 16)) for c in coded.split())
self._testpoint(codech, unich)
def test_mapping_supplemental(self):
for mapping in self.supmaps:
self._testpoint(*mapping)
def _testpoint(self, csetch, unich):
if (csetch, unich) not in self.pass_enctest:
self.assertEqual(unich.encode(self.encoding), csetch)
if (csetch, unich) not in self.pass_dectest:
self.assertEqual(unicode(csetch, self.encoding), unich)
def load_teststring(encoding):
from test import cjkencodings_test
return cjkencodings_test.teststring[encoding]