All Downloads are FREE. Search and download functionalities are using the official Maven repository.

Lib.test.test_xml_etree_jy.py Maven / Gradle / Ivy

Go to download

Jython is an implementation of the high-level, dynamic, object-oriented language Python written in 100% Pure Java, and seamlessly integrated with the Java platform. It thus allows you to run Python on any Java platform.

There is a newer version: 2.7.4
Show newest version
# encoding: utf-8

import sys
JYTHON = sys.platform.startswith("java")

import doctest

import xml.parsers.expat as expat
from xml.etree.ElementTree import *

def jython(function):
    if JYTHON:
        return function
    else:
        return None

class sortdict(dict):
    def __repr__(self):
        items = self.items()
        items.sort()
        pairs = ["%r: %r" % pair for pair in items]
        return "{%s}" % ", ".join(pairs)
    __str__ = __repr__


class Outputter:
    def StartElementHandler(self, name, attrs):
        print 'Start element:\n   ', repr(name), sortdict(attrs)

    def EndElementHandler(self, name):
        print 'End element:\n   ', repr(name)

    def CharacterDataHandler(self, data):
        data = data.strip()
        if data:
            print 'Character data:'
            print '   ', repr(data)

    def ProcessingInstructionHandler(self, target, data):
        print 'PI:\n   ', repr(target), repr(data)

    def StartNamespaceDeclHandler(self, prefix, uri):
        print 'NS decl:\n   ', repr(prefix), repr(uri)

    def EndNamespaceDeclHandler(self, prefix):
        print 'End of NS decl:\n   ', repr(prefix)

    def StartCdataSectionHandler(self):
        print 'Start of CDATA section'

    def EndCdataSectionHandler(self):
        print 'End of CDATA section'

    def CommentHandler(self, text):
        print 'Comment:\n   ', repr(text)

    def NotationDeclHandler(self, *args):
        name, base, sysid, pubid = args
        print 'Notation declared:', args

    def UnparsedEntityDeclHandler(self, *args):
        entityName, base, systemId, publicId, notationName = args
        print 'Unparsed entity decl:\n   ', args

    def NotStandaloneHandler(self, userData):
        print 'Not standalone'
        return 1

    def ExternalEntityRefHandler(self, *args):
        context, base, sysId, pubId = args
        print 'External entity ref:', args[1:]
        return 1

    def DefaultHandler(self, userData):
        pass

    def DefaultHandlerExpand(self, userData):
        pass

_=  """
    >>> data = '''\
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... %unparsed_entity;
    ... ]>
    ...
    ... 
    ... 
    ...      Contents of subelements
    ... 
    ... 
    ... &external_entity;
    ... 
    ... '''
    """

def test_utf8():
    """
    Source: test_pyexpat.py
    Changes: replaced tabs with spaces in Outputter to ease doctest integration

    >>> out = Outputter()
    >>> parser = expat.ParserCreate(namespace_separator='!')
    >>> HANDLER_NAMES = [
    ...     'StartElementHandler', 'EndElementHandler',
    ...     'CharacterDataHandler',
    ...     'ProcessingInstructionHandler',
    ...     'UnparsedEntityDeclHandler', 'NotationDeclHandler',
    ...     'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler',
    ...     'CommentHandler', 'StartCdataSectionHandler',
    ...     'EndCdataSectionHandler',
    ...     'DefaultHandler', 'DefaultHandlerExpand',
    ...     #'NotStandaloneHandler',
    ...     'ExternalEntityRefHandler'
    ...     ]
    >>> for name in HANDLER_NAMES:
    ...     setattr(parser, name, getattr(out, name))

    >>> data = '''\\
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... 
    ... %unparsed_entity;
    ... ]>
    ...
    ... 
    ... 
    ...      Contents of subelements
    ... 
    ... 
    ... &external_entity;
    ... 
    ... '''

    #Produce UTF-8 output
    #>>> parser.returns_unicode = 0
    #>>> try:
    #...     parser.Parse(data, 1)
    #... except expat.error:
    #...     print '** Error', parser.ErrorCode, expat.ErrorString(parser.ErrorCode)
    #...     print '** Line', parser.ErrorLineNumber
    #...     print '** Column', parser.ErrorColumnNumber
    #...     print '** Byte', parser.ErrorByteIndex
    #PI:
        #'xml-stylesheet' 'href="stylesheet.css"'
    #Comment:
        #' comment data '
    #Notation declared: ('notation', None, 'notation.jpeg', None)
    #Unparsed entity decl:
        #('unparsed_entity', None, 'entity.file', None, 'notation')
    #Start element:
        #'root' {'attr1': 'value1', 'attr2': 'value2\\xe1\\xbd\\x80'}
    #NS decl:
        #'myns' 'http://www.python.org/namespace'
    #Start element:
        #'http://www.python.org/namespace!subelement' {}
    #Character data:
        #'Contents of subelements'
    #End element:
        #'http://www.python.org/namespace!subelement'
    #End of NS decl:
        #'myns'
    #Start element:
        #'sub2' {}
    #Start of CDATA section
    #Character data:
        #'contents of CDATA section'
    #End of CDATA section
    #End element:
        #'sub2'
    #External entity ref: (None, 'entity.file', None)
    #End element:
        #'root'
    #1

    >>> parser = expat.ParserCreate(namespace_separator='!')
    >>> parser.returns_unicode = 1
    >>> for name in HANDLER_NAMES:
    ...     setattr(parser, name, getattr(out, name))
    >>> try:
    ...     parser.Parse(data, 1)
    ... except expat.error:
    ...     print '** Line', parser.ErrorLineNumber
    ...     print '** Column', parser.ErrorColumnNumber
    ...     print '** Byte', parser.ErrorByteIndex #doctest: +REPORT_UDIFF
    PI:
        u'xml-stylesheet' u'href="stylesheet.css"'
    Comment:
        u' comment data '
    Notation declared: (u'notation', None, u'notation.jpeg', None)
    Unparsed entity decl:
        (u'unparsed_entity', None, u'entity.file', None, u'notation')
    Start element:
        u'root' {u'attr1': u'value1', u'attr2': u'value2\u1f40'}
    NS decl:
        u'myns' u'http://www.python.org/namespace'
    Start element:
        u'http://www.python.org/namespace!subelement' {}
    Character data:
        u'Contents of subelements'
    End element:
        u'http://www.python.org/namespace!subelement'
    End of NS decl:
        u'myns'
    Start element:
        u'sub2' {}
    Start of CDATA section
    Character data:
        u'contents of CDATA section'
    End of CDATA section
    End element:
        u'sub2'
    External entity ref: (None, u'entity.file', None)
    End element:
        u'root'
    1
    """


def test_import_as_pyexpat():
    """
    >>> import pyexpat as expat
    >>> expat #doctest: +ELLIPSIS
    
    """


def test_errors_submodule():
    """
    >>> import xml.parsers.expat as expat
    >>> expat.errors
    
    >>> dir(expat.errors) #doctest: +ELLIPSIS
    ['XML_ERROR_ABORTED', ..., 'XML_ERROR_XML_DECL', '__doc__', '__name__']
    >>> expat.errors.XML_ERROR_ABORTED
    'parsing aborted'
    >>> expat.errors.XML_ERROR_XML_DECL
    'XML declaration not well-formed'
    """

def test_model_submodule():
    """
    >>> import xml.parsers.expat as expat
    >>> expat.model
    
    >>> print sortdict(expat.model.__dict__)
    {'XML_CQUANT_NONE': 0, 'XML_CQUANT_OPT': 1, 'XML_CQUANT_PLUS': 3, 'XML_CQUANT_REP': 2, 'XML_CTYPE_ANY': 2, 'XML_CTYPE_CHOICE': 5, 'XML_CTYPE_EMPTY': 1, 'XML_CTYPE_MIXED': 3, 'XML_CTYPE_NAME': 4, 'XML_CTYPE_SEQ': 6, '__doc__': 'Constants used to interpret content model information.', '__name__': 'pyexpat.model'}
    """

def test_parse_only_xml_data():
    """
    Source: test_pyexpat.py, see also: http://python.org/sf/1296433
    Changes:
      - replaced 'iso8859' encoding with 'ISO-8859-1',
      - added isfinal=True keyword argument to Parse call (as in this port,
        the data is not processed until it is fully available).
    With these changes, the test still crashes CPython 2.5.

    >>> import xml.parsers.expat as expat
    >>> # xml = "%s" % ('a' * 1025)

    This one doesn't crash:
    >>> xml = "%s" % ('a' * 10000)

    >>> def handler(text):
    ...     raise Exception
    >>> parser = expat.ParserCreate()
    >>> parser.CharacterDataHandler = handler
    >>> try:
    ...     parser.Parse(xml, True)
    ... except:
    ...     pass
    """


def test_namespace_separator():
    """
    Source: test_pyexpat.py

    Tests that make sure we get errors when the namespace_separator value
    is illegal, and that we don't for good values:

    >>> from xml.parsers.expat import ParserCreate

    >>> p = ParserCreate()
    >>> p = ParserCreate(namespace_separator=None)
    >>> p = ParserCreate(namespace_separator=' ')
    >>> p = ParserCreate(namespace_separator=42) #doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    TypeError: ...
    >>> p = ParserCreate(namespace_separator='too long') #doctest: +ELLIPSIS
    Traceback (most recent call last):
    ...
    ValueError: ...

    ParserCreate() needs to accept a namespace_separator of zero length
    to satisfy the requirements of RDF applications that are required
    to simply glue together the namespace URI and the localname.  Though
    considered a wart of the RDF specifications, it needs to be supported.

    See XML-SIG mailing list thread starting with
    http://mail.python.org/pipermail/xml-sig/2001-April/005202.html

    >>> p = ParserCreate(namespace_separator='') # too short
"""


def test_interning_machinery():
    """
    Source: test_pyexpat.py

    >>> from xml.parsers.expat import ParserCreate

    >>> p = ParserCreate()
    >>> L = []
    >>> def collector(name, *args):
    ...     L.append(name)
    >>> p.StartElementHandler = collector
    >>> p.EndElementHandler = collector
    >>> p.Parse("   ", 1)
    1
    >>> tag = L[0]
    >>> len(L)
    6
    >>> all(tag is entry for entry in L)
    True
    """


def test_exception_from_callback():
    """
    Source: test_pyexpat.py

    >>> from xml.parsers.expat import ParserCreate

    >>> def StartElementHandler(name, attrs):
    ...     raise RuntimeError(name)

    >>> parser = ParserCreate()
    >>> parser.StartElementHandler = StartElementHandler
    >>> try:
    ...     parser.Parse("", 1)
    ... except RuntimeError, e:
    ...     pass
    >>> e.args[0] == "a"
    True
    """


def test_with_and_without_namespace():
    """
    >>> from xml.parsers.expat import ParserCreate

    >>> xml = '''
    ...            
    ...            
    ...          '''
    >>> def handler(name, attributes):
    ...     attributes = sorted(attributes.items())
    ...     print name
    ...     for attr in attributes:
    ...         print "  %s = %r" % attr

    >>> parser = ParserCreate()
    >>> parser.StartElementHandler = handler
    >>> _ = parser.Parse(xml, True)
    root
      b = u'2'
      python:a = u'1'
      xmlns = u'http://www.python.org'
      xmlns:python = u'http://www.python.org'
    python:sub1
    sub2
      xmlns = u''

    >>> parser = ParserCreate(namespace_separator="|")
    >>> parser.StartElementHandler = handler
    >>> _ = parser.Parse(xml, True)
    http://www.python.org|root
      b = u'2'
      http://www.python.org|a = u'1'
    http://www.python.org|sub1
    sub2
    """

def test_unicode_bug():
    """
    Regression introduced by revision 28

    >>> doc = XML("")
    >>> doc.text
    u'\u8230'
    """

def test_DTD():
    """
    >>> xml = '''
    ...          
    ...          
    ...          
    ...          
    ...          
    ...          
    ...          
    ...          
    ...          
    ...          ]>
    ...          content
    ...       '''
    >>> parser = expat.ParserCreate()
    >>> def handler(header, *args):
    ...     def _handler(*args):
    ...         print header + ":", args
    ...     return _handler
    >>> parser.ElementDeclHandler = handler("ELEMENT")
    >>> parser.AttlistDeclHandler = handler("ATTRIBUTE")
    >>> parser.EntityDeclHandler = handler("ENTITY")
    >>> parser.NotationDeclHandler = handler("NOTATION")
    >>> parser.UnparsedEntityDeclHandler = handler("UNPARSED")
    >>> parser.Parse(xml, True)
    ELEMENT: (u'doc', (5, 0, None, ((4, 0, u'any', ()), (4, 0, u'empty', ()), (4, 0, u'text', ()), (4, 0, u'mixed', ()), (4, 0, u'opt', ()), (4, 0, u'many', ()), (4, 0, u'plus', ()))))
    ELEMENT: (u'any', (2, 0, None, ()))
    ELEMENT: (u'empty', (1, 0, None, ()))
    ELEMENT: (u'text', (3, 0, None, ()))
    ELEMENT: (u'sequence', (6, 0, None, ((4, 0, u'_sequence', ()),)))
    ELEMENT: (u'_sequence', (6, 0, None, ((4, 0, u'any', ()), (4, 0, u'any', ()))))
    ELEMENT: (u'mixed', (3, 2, None, ((4, 0, u'any', ()),)))
    ELEMENT: (u'opt', (6, 1, None, ((4, 0, u'empty', ()),)))
    ELEMENT: (u'many', (6, 2, None, ((4, 0, u'empty', ()),)))
    ELEMENT: (u'plus', (6, 3, None, ((4, 0, u'empty', ()),)))
    1
    """

def test_entity():
    """

    TODO: need a fallback for entity-resolver so that empty source is returned.

    >>> xml = ''' 
    ...           ]>
    ...           &ext-entity;&in-ext-dtd-entity;'''
    >>> parser = expat.ParserCreate()
    >>> parser.Parse(xml, True)
    1

    EXPAT OH MY ! When applicable (internal entities), the CharacterDataHandler
    callback will override DefaultHandlerExpand, but it WON'T override
    DefaultHandler. On the other hand, the DefaultHandlerExpand callback WILL
    override DefaultHandler ... More tests todo here ...

    >>> xml = '''
    ...          
    ...          ]>
    ...           &int-entity;&ext-entity;&in-ext-dtd-entity;'''
    >>> parser = expat.ParserCreate()
    >>> def handler(header):
    ...     def _handler(*args):
    ...         print header + ":", args
    ...         return 1
    ...     return _handler
    >>> parser.CharacterDataHandler = handler("text")
    >>> parser.DefaultHandler = handler("default")
    >>> parser.Parse(xml, True) #doctest: +ELLIPSIS
    default: ...
    default: (u'&int-entity;',)
    default: (u'&ext-entity;',)
    default: (u'&in-ext-dtd-entity;',)
    ...
    1

    EXPAT OH MY ! When applicable (internal entities), the CharacterDataHandler
    callback will override DefaultHandlerExpand, but it WON'T override
    DefaultHandler. On the other hand, the DefaultHandlerExpand callback WILL
    override DefaultHandler ... More tests todo here ...
    """

def test_resolve_entity_handlers():
    """
    >>> xml = '''
    ...          ]>
    ...          &entity;'''
    >>> def handler(header):
    ...     def _handler(*args):
    ...         print header + ":", args
    ...         return 1
    ...     return _handler

    >>> parser = expat.ParserCreate()
    >>> parser.ExternalEntityRefHandler = handler("ExternalEntityRefHandler")
    >>> parser.Parse(xml, True)
    ExternalEntityRefHandler: (u'entity', None, u'entity', None)
    1
    """

def handler(name, header="XML>", returns=None):
    def _handler(*args):
        if len(args) == 1:
            args = "(%r)" % args[0]
        else:
            args = str(args)
        print header, name + "%s" % args
        return returns
    return _handler

def parse(xml, *handlers):
    parser = expat.ParserCreate()
    for name in handlers:
        if name == "ExternalEntityRefHandler":
            returns = 1
        else:
            returns = None
        setattr(parser, name, handler(name, returns=returns))
    parser.Parse(xml, True)

def test_internal_entities():
    """
    >>> xml = '''
    ...          ]>
    ...          &entity;'''

    >>> parse(xml)

    >>> parse(xml, "CharacterDataHandler")
    XML> CharacterDataHandler(u'entity-content')

    >>> parse(xml, "DefaultHandler") #doctest: +ELLIPSIS
    XML> ...DefaultHandler(u'&entity;')...

    >>> parse(xml, "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...DefaultHandlerExpand(u'entity-content')...

    # Uhu ?
    >>> parse(xml, "CharacterDataHandler",
    ...            "DefaultHandler") #doctest: +ELLIPSIS
    XML> ...DefaultHandler(u'&entity;')...

    >>> parse(xml, "CharacterDataHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...CharacterDataHandler(u'entity-content')...

    >>> parse(xml, "DefaultHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...DefaultHandlerExpand(u'entity-content')...

    >>> parse(xml, "CharacterDataHandler",
    ...            "DefaultHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...CharacterDataHandler(u'entity-content')...
    """

def test_external_entities():
    """
    >>> xml = '''
    ...          ]>
    ...          &entity;'''

    >>> parse(xml)

    >>> parse(xml, "ExternalEntityRefHandler")
    XML> ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')

    >>> parse(xml, "DefaultHandler") #doctest: +ELLIPSIS
    XML> ...DefaultHandler(u'&entity;')...

    >>> parse(xml, "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...DefaultHandlerExpand(u'&entity;')...

    >>> parse(xml, "ExternalEntityRefHandler",
    ...            "DefaultHandler") #doctest: +ELLIPSIS
    XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')...

    >>> parse(xml, "ExternalEntityRefHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')...

    >>> parse(xml, "DefaultHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...DefaultHandlerExpand(u'&entity;')...

    >>> parse(xml, "ExternalEntityRefHandler",
    ...            "DefaultHandler",
    ...            "DefaultHandlerExpand") #doctest: +ELLIPSIS
    XML> ...ExternalEntityRefHandler(u'entity', None, u'entity-file', u'http://entity-web')...
    """

def test_undefined_entities():
    """
    >>> xml = "&entity;"
    >>> parse(xml)
    Traceback (most recent call last):
    ...
    ExpatError: undefined entity: line 1, column 5
    """

def locate(parser, name):
    def _handler(*args):
        print name, parser.CurrentLineNumber, parser.CurrentColumnNumber
    return _handler

def test_current_location():
    """
    >>> xml = '''texttext
    ... 
    ... text
    ... '''
    >>> parser = expat.ParserCreate()
    >>> parser.CharacterDataHandler = locate(parser, "TEXT:")
    >>> parser.StartElementHandler = locate(parser, "START:")
    >>> parser.EndElementHandler = locate(parser, "END:")
    >>> _ = parser.Parse(xml, True) #doctest: +ELLIPSIS
    START: 1 0
    TEXT: 1 5...
    START: 1 9
    END: 1 15
    TEXT: 1 15...
    START: 1 19
    END: 1 24
    TEXT: 1 30...
    START: 2 0
    END: 2 5
    TEXT: 2 11...
    START: 3 4
    END: 3 10
    TEXT: 3 10...
    END: 4 0

    >>> xml = '''
    ... start tag after some text
    ... 
    ... 
    ... '''
    >>> parser = expat.ParserCreate()
    >>> parser.CharacterDataHandler = locate(parser, "TEXT:")
    >>> parser.StartElementHandler = locate(parser, "START:")
    >>> parser.EndElementHandler = locate(parser, "END:")
    >>> _ = parser.Parse(xml, True) #doctest: +ELLIPSIS
    START: 1 0
    TEXT: 1 5...
    START: 2 25
    END: 2 31
    TEXT: 2 31...
    START: 3 0
    END: 3 5
    START: 3 11
    END: 3 17
    TEXT: 3 17...
    START: 4 0
    END: 4 6
    START: 4 6
    END: 4 12
    TEXT: 4 12...
    END: 5 0
    """


def test_error_location():
    """
    Source: selftest.py, ElementTree 1.3a3
    Changes: removed dependencies in ElementTree, added one extra test

    >>> def error(xml):
    ...     p = expat.ParserCreate()
    ...     try:
    ...         p.Parse(xml, True)
    ...     except expat.ExpatError, e:
    ...         return e.lineno, e.offset

    >>> error("foo")
    (1, 0)
    >>> error("&foo;")
    (1, 5)
    >>> error("foobar<")
    (1, 6)
    >>> error("text>> # Jython
    >>> from org.python.core.util import StringUtil
    >>> from jarray import array

    >>> # Java Standard Edition
    >>> from org.xml.sax import *
    >>> from org.xml.sax.ext import *
    >>> from org.xml.sax.helpers import *
    >>> from java.io import ByteArrayInputStream

    >>> xml = '''
    ... ]>
    ... &entity;
    ... '''

    >>> def empty_source():
    ...     _source = InputSource()
    ...     byte_stream = ByteArrayInputStream(array([], "b"))
    ...     _source.setByteStream(byte_stream)
    ...     return _source

    >>> class Handler(EntityResolver2):
    ...     def getExternalSubset(self, name, baseURI):
    ...         return None
    ...     def resolveEntity(self, name, publicId, baseURI, systemId):
    ...         print "Entity name:", name
    ...         return empty_source()

    >>> def main():
    ...     sax_parser = "org.apache.xerces.parsers.SAXParser"
    ...     reader = XMLReaderFactory.createXMLReader(sax_parser)
    ...     entity_resolver2 = "http://xml.org/sax/features/use-entity-resolver2"
    ...     enabled = reader.getFeature(entity_resolver2)
    ...     print "Entity-Resolver2 enabled:", enabled
    ...     handler = Handler()
    ...     reader.setEntityResolver(handler)
    ...     bytes = StringUtil.toBytes(xml)
    ...     byte_stream = ByteArrayInputStream(bytes)
    ...     source = InputSource(byte_stream)
    ...     reader.parse(source)

    >>> main()
    Entity-Resolver2 enabled: True
    Entity name: entity
    """

def test_close_files():
    # http://bugs.jython.org/issue1479
    """
    >>> import os
    >>> from test import test_support
    >>> from xml.etree import ElementTree as ET

    >>> ET.ElementTree(ET.XML('')).write(test_support.TESTFN)
    >>> os.remove(test_support.TESTFN)

    >>> fp = open(test_support.TESTFN, 'w')
    >>> fp.write('')
    >>> fp.close()
    >>> tree = ET.parse(test_support.TESTFN)
    >>> os.remove(test_support.TESTFN)
    """

if __name__ == "__main__":
    doctest.testmod()




© 2015 - 2024 Weber Informatics LLC | Privacy Policy