lib-python.2.7.test.test_xml_etree.py Maven / Gradle / Ivy
Show all versions of jython Show documentation
# xml.etree test. This file contains enough tests to make sure that
# all included components work as they should.
# Large parts are extracted from the upstream test suite.
# IMPORTANT: the same doctests are run from "test_xml_etree_c" in
# order to ensure consistency between the C implementation and the
# Python implementation.
#
# For this purpose, the module-level "ET" symbol is temporarily
# monkey-patched when running the "test_xml_etree_c" test suite.
# Don't re-import "xml.etree.ElementTree" module in the docstring,
# except if the test is specific to the Python implementation.
import sys
import cgi
from test import test_support
from test.test_support import findfile
from xml.etree import ElementTree as ET
SIMPLE_XMLFILE = findfile("simple.xml", subdir="xmltestdata")
SIMPLE_NS_XMLFILE = findfile("simple-ns.xml", subdir="xmltestdata")
SAMPLE_XML = """\
text
subtext
"""
SAMPLE_SECTION = """\
subtext
"""
SAMPLE_XML_NS = """
text
subtext
"""
def sanity():
"""
Import sanity.
>>> from xml.etree import ElementTree
>>> from xml.etree import ElementInclude
>>> from xml.etree import ElementPath
"""
def check_method(method):
if not hasattr(method, '__call__'):
print method, "not callable"
def serialize(elem, to_string=True, **options):
import StringIO
file = StringIO.StringIO()
tree = ET.ElementTree(elem)
tree.write(file, **options)
if to_string:
return file.getvalue()
else:
file.seek(0)
return file
def summarize(elem):
if elem.tag == ET.Comment:
return ""
return elem.tag
def summarize_list(seq):
return [summarize(elem) for elem in seq]
def normalize_crlf(tree):
for elem in tree.iter():
if elem.text:
elem.text = elem.text.replace("\r\n", "\n")
if elem.tail:
elem.tail = elem.tail.replace("\r\n", "\n")
def check_string(string):
len(string)
for char in string:
if len(char) != 1:
print "expected one-character string, got %r" % char
new_string = string + ""
new_string = string + " "
string[:0]
def check_mapping(mapping):
len(mapping)
keys = mapping.keys()
items = mapping.items()
for key in keys:
item = mapping[key]
mapping["key"] = "value"
if mapping["key"] != "value":
print "expected value string, got %r" % mapping["key"]
def check_element(element):
if not ET.iselement(element):
print "not an element"
if not hasattr(element, "tag"):
print "no tag member"
if not hasattr(element, "attrib"):
print "no attrib member"
if not hasattr(element, "text"):
print "no text member"
if not hasattr(element, "tail"):
print "no tail member"
check_string(element.tag)
check_mapping(element.attrib)
if element.text is not None:
check_string(element.text)
if element.tail is not None:
check_string(element.tail)
for elem in element:
check_element(elem)
# --------------------------------------------------------------------
# element tree tests
def interface():
r"""
Test element tree interface.
>>> element = ET.Element("tag")
>>> check_element(element)
>>> tree = ET.ElementTree(element)
>>> check_element(tree.getroot())
>>> element = ET.Element("t\xe4g", key="value")
>>> tree = ET.ElementTree(element)
>>> repr(element) # doctest: +ELLIPSIS
""
>>> element = ET.Element("tag", key="value")
Make sure all standard element methods exist.
>>> check_method(element.append)
>>> check_method(element.extend)
>>> check_method(element.insert)
>>> check_method(element.remove)
>>> check_method(element.getchildren)
>>> check_method(element.find)
>>> check_method(element.iterfind)
>>> check_method(element.findall)
>>> check_method(element.findtext)
>>> check_method(element.clear)
>>> check_method(element.get)
>>> check_method(element.set)
>>> check_method(element.keys)
>>> check_method(element.items)
>>> check_method(element.iter)
>>> check_method(element.itertext)
>>> check_method(element.getiterator)
These methods return an iterable. See bug 6472.
>>> check_method(element.iter("tag").next)
>>> check_method(element.iterfind("tag").next)
>>> check_method(element.iterfind("*").next)
>>> check_method(tree.iter("tag").next)
>>> check_method(tree.iterfind("tag").next)
>>> check_method(tree.iterfind("*").next)
These aliases are provided:
>>> assert ET.XML == ET.fromstring
>>> assert ET.PI == ET.ProcessingInstruction
>>> assert ET.XMLParser == ET.XMLTreeBuilder
"""
def simpleops():
"""
Basic method sanity checks.
>>> elem = ET.XML(" ")
>>> serialize(elem)
' '
>>> e = ET.Element("tag2")
>>> elem.append(e)
>>> serialize(elem)
' '
>>> elem.remove(e)
>>> serialize(elem)
' '
>>> elem.insert(0, e)
>>> serialize(elem)
' '
>>> elem.remove(e)
>>> elem.extend([e])
>>> serialize(elem)
' '
>>> elem.remove(e)
>>> element = ET.Element("tag", key="value")
>>> serialize(element) # 1
' '
>>> subelement = ET.Element("subtag")
>>> element.append(subelement)
>>> serialize(element) # 2
' '
>>> element.insert(0, subelement)
>>> serialize(element) # 3
' '
>>> element.remove(subelement)
>>> serialize(element) # 4
' '
>>> element.remove(subelement)
>>> serialize(element) # 5
' '
>>> element.remove(subelement)
Traceback (most recent call last):
ValueError: list.remove(x): x not in list
>>> serialize(element) # 6
' '
>>> element[0:0] = [subelement, subelement, subelement]
>>> serialize(element[1])
' '
>>> element[1:9] == [element[1], element[2]]
True
>>> element[:9:2] == [element[0], element[2]]
True
>>> del element[1:2]
>>> serialize(element)
' '
"""
def cdata():
"""
Test CDATA handling (etc).
>>> serialize(ET.XML("hello "))
'hello '
>>> serialize(ET.XML("hello "))
'hello '
>>> serialize(ET.XML(" "))
'hello '
"""
# Only with Python implementation
def simplefind():
"""
Test find methods using the elementpath fallback.
>>> from xml.etree import ElementTree
>>> CurrentElementPath = ElementTree.ElementPath
>>> ElementTree.ElementPath = ElementTree._SimpleElementPath()
>>> elem = ElementTree.XML(SAMPLE_XML)
>>> elem.find("tag").tag
'tag'
>>> ElementTree.ElementTree(elem).find("tag").tag
'tag'
>>> elem.findtext("tag")
'text'
>>> elem.findtext("tog")
>>> elem.findtext("tog", "default")
'default'
>>> ElementTree.ElementTree(elem).findtext("tag")
'text'
>>> summarize_list(elem.findall("tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag']
Path syntax doesn't work in this case.
>>> elem.find("section/tag")
>>> elem.findtext("section/tag")
>>> summarize_list(elem.findall("section/tag"))
[]
>>> ElementTree.ElementPath = CurrentElementPath
"""
def find():
"""
Test find methods (including xpath syntax).
>>> elem = ET.XML(SAMPLE_XML)
>>> elem.find("tag").tag
'tag'
>>> ET.ElementTree(elem).find("tag").tag
'tag'
>>> elem.find("section/tag").tag
'tag'
>>> elem.find("./tag").tag
'tag'
>>> ET.ElementTree(elem).find("./tag").tag
'tag'
>>> ET.ElementTree(elem).find("/tag").tag
'tag'
>>> elem[2] = ET.XML(SAMPLE_SECTION)
>>> elem.find("section/nexttag").tag
'nexttag'
>>> ET.ElementTree(elem).find("section/tag").tag
'tag'
>>> ET.ElementTree(elem).find("tog")
>>> ET.ElementTree(elem).find("tog/foo")
>>> elem.findtext("tag")
'text'
>>> elem.findtext("section/nexttag")
''
>>> elem.findtext("section/nexttag", "default")
''
>>> elem.findtext("tog")
>>> elem.findtext("tog", "default")
'default'
>>> ET.ElementTree(elem).findtext("tag")
'text'
>>> ET.ElementTree(elem).findtext("tog/foo")
>>> ET.ElementTree(elem).findtext("tog/foo", "default")
'default'
>>> ET.ElementTree(elem).findtext("./tag")
'text'
>>> ET.ElementTree(elem).findtext("/tag")
'text'
>>> elem.findtext("section/tag")
'subtext'
>>> ET.ElementTree(elem).findtext("section/tag")
'subtext'
>>> summarize_list(elem.findall("."))
['body']
>>> summarize_list(elem.findall("tag"))
['tag', 'tag']
>>> summarize_list(elem.findall("tog"))
[]
>>> summarize_list(elem.findall("tog/foo"))
[]
>>> summarize_list(elem.findall("*"))
['tag', 'tag', 'section']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag', 'tag']
>>> summarize_list(elem.findall("section/tag"))
['tag']
>>> summarize_list(elem.findall("section//tag"))
['tag', 'tag']
>>> summarize_list(elem.findall("section/*"))
['tag', 'nexttag', 'nextsection']
>>> summarize_list(elem.findall("section//*"))
['tag', 'nexttag', 'nextsection', 'tag']
>>> summarize_list(elem.findall("section/.//*"))
['tag', 'nexttag', 'nextsection', 'tag']
>>> summarize_list(elem.findall("*/*"))
['tag', 'nexttag', 'nextsection']
>>> summarize_list(elem.findall("*//*"))
['tag', 'nexttag', 'nextsection', 'tag']
>>> summarize_list(elem.findall("*/tag"))
['tag']
>>> summarize_list(elem.findall("*/./tag"))
['tag']
>>> summarize_list(elem.findall("./tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag"))
['tag', 'tag', 'tag', 'tag']
>>> summarize_list(elem.findall("././tag"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@class]"))
['tag', 'tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@class='a']"))
['tag']
>>> summarize_list(elem.findall(".//tag[@class='b']"))
['tag', 'tag']
>>> summarize_list(elem.findall(".//tag[@id]"))
['tag']
>>> summarize_list(elem.findall(".//section[tag]"))
['section']
>>> summarize_list(elem.findall(".//section[element]"))
[]
>>> summarize_list(elem.findall("../tag"))
[]
>>> summarize_list(elem.findall("section/../tag"))
['tag', 'tag']
>>> summarize_list(ET.ElementTree(elem).findall("./tag"))
['tag', 'tag']
Following example is invalid in 1.2.
A leading '*' is assumed in 1.3.
>>> elem.findall("section//") == elem.findall("section//*")
True
ET's Path module handles this case incorrectly; this gives
a warning in 1.3, and the behaviour will be modified in 1.4.
>>> summarize_list(ET.ElementTree(elem).findall("/tag"))
['tag', 'tag']
>>> elem = ET.XML(SAMPLE_XML_NS)
>>> summarize_list(elem.findall("tag"))
[]
>>> summarize_list(elem.findall("{http://effbot.org/ns}tag"))
['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
>>> summarize_list(elem.findall(".//{http://effbot.org/ns}tag"))
['{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag', '{http://effbot.org/ns}tag']
"""
def file_init():
"""
>>> import StringIO
>>> stringfile = StringIO.StringIO(SAMPLE_XML)
>>> tree = ET.ElementTree(file=stringfile)
>>> tree.find("tag").tag
'tag'
>>> tree.find("section/tag").tag
'tag'
>>> tree = ET.ElementTree(file=SIMPLE_XMLFILE)
>>> tree.find("element").tag
'element'
>>> tree.find("element/../empty-element").tag
'empty-element'
"""
def bad_find():
"""
Check bad or unsupported path expressions.
>>> elem = ET.XML(SAMPLE_XML)
>>> elem.findall("/tag")
Traceback (most recent call last):
SyntaxError: cannot use absolute path on element
"""
def path_cache():
"""
Check that the path cache behaves sanely.
>>> elem = ET.XML(SAMPLE_XML)
>>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
>>> cache_len_10 = len(ET.ElementPath._cache)
>>> for i in range(10): ET.ElementTree(elem).find('./'+str(i))
>>> len(ET.ElementPath._cache) == cache_len_10
True
>>> for i in range(20): ET.ElementTree(elem).find('./'+str(i))
>>> len(ET.ElementPath._cache) > cache_len_10
True
>>> for i in range(600): ET.ElementTree(elem).find('./'+str(i))
>>> len(ET.ElementPath._cache) < 500
True
"""
def copy():
"""
Test copy handling (etc).
>>> import copy
>>> e1 = ET.XML("hello ")
>>> e2 = copy.copy(e1)
>>> e3 = copy.deepcopy(e1)
>>> e1.find("foo").tag = "bar"
>>> serialize(e1)
'hello '
>>> serialize(e2)
'hello '
>>> serialize(e3)
'hello '
"""
def attrib():
"""
Test attribute handling.
>>> elem = ET.Element("tag")
>>> elem.get("key") # 1.1
>>> elem.get("key", "default") # 1.2
'default'
>>> elem.set("key", "value")
>>> elem.get("key") # 1.3
'value'
>>> elem = ET.Element("tag", key="value")
>>> elem.get("key") # 2.1
'value'
>>> elem.attrib # 2.2
{'key': 'value'}
>>> attrib = {"key": "value"}
>>> elem = ET.Element("tag", attrib)
>>> attrib.clear() # check for aliasing issues
>>> elem.get("key") # 3.1
'value'
>>> elem.attrib # 3.2
{'key': 'value'}
>>> attrib = {"key": "value"}
>>> elem = ET.Element("tag", **attrib)
>>> attrib.clear() # check for aliasing issues
>>> elem.get("key") # 4.1
'value'
>>> elem.attrib # 4.2
{'key': 'value'}
>>> elem = ET.Element("tag", {"key": "other"}, key="value")
>>> elem.get("key") # 5.1
'value'
>>> elem.attrib # 5.2
{'key': 'value'}
>>> elem = ET.Element('test')
>>> elem.text = "aa"
>>> elem.set('testa', 'testval')
>>> elem.set('testb', 'test2')
>>> ET.tostring(elem)
'aa '
>>> sorted(elem.keys())
['testa', 'testb']
>>> sorted(elem.items())
[('testa', 'testval'), ('testb', 'test2')]
>>> elem.attrib['testb']
'test2'
>>> elem.attrib['testb'] = 'test1'
>>> elem.attrib['testc'] = 'test2'
>>> ET.tostring(elem)
'aa '
"""
def makeelement():
"""
Test makeelement handling.
>>> elem = ET.Element("tag")
>>> attrib = {"key": "value"}
>>> subelem = elem.makeelement("subtag", attrib)
>>> if subelem.attrib is attrib:
... print "attrib aliasing"
>>> elem.append(subelem)
>>> serialize(elem)
' '
>>> elem.clear()
>>> serialize(elem)
' '
>>> elem.append(subelem)
>>> serialize(elem)
' '
>>> elem.extend([subelem, subelem])
>>> serialize(elem)
' '
>>> elem[:] = [subelem]
>>> serialize(elem)
' '
>>> elem[:] = tuple([subelem])
>>> serialize(elem)
' '
"""
def parsefile():
"""
Test parsing from file.
>>> tree = ET.parse(SIMPLE_XMLFILE)
>>> normalize_crlf(tree)
>>> tree.write(sys.stdout)
text
text tail
>>> tree = ET.parse(SIMPLE_NS_XMLFILE)
>>> normalize_crlf(tree)
>>> tree.write(sys.stdout)
text
text tail
>>> with open(SIMPLE_XMLFILE) as f:
... data = f.read()
>>> parser = ET.XMLParser()
>>> parser.version # doctest: +ELLIPSIS
'Expat ...'
>>> parser.feed(data)
>>> print serialize(parser.close())
text
text tail
>>> parser = ET.XMLTreeBuilder() # 1.2 compatibility
>>> parser.feed(data)
>>> print serialize(parser.close())
text
text tail
>>> target = ET.TreeBuilder()
>>> parser = ET.XMLParser(target=target)
>>> parser.feed(data)
>>> print serialize(parser.close())
text
text tail
"""
def parseliteral():
"""
>>> element = ET.XML("text")
>>> ET.ElementTree(element).write(sys.stdout)
text
>>> element = ET.fromstring("text")
>>> ET.ElementTree(element).write(sys.stdout)
text
>>> sequence = ["", "text"]
>>> element = ET.fromstringlist(sequence)
>>> print ET.tostring(element)
text
>>> print "".join(ET.tostringlist(element))
text
>>> ET.tostring(element, "ascii")
"\\ntext"
>>> _, ids = ET.XMLID("text")
>>> len(ids)
0
>>> _, ids = ET.XMLID("text")
>>> len(ids)
1
>>> ids["body"].tag
'body'
"""
def iterparse():
"""
Test iterparse interface.
>>> iterparse = ET.iterparse
>>> context = iterparse(SIMPLE_XMLFILE)
>>> action, elem = next(context)
>>> print action, elem.tag
end element
>>> for action, elem in context:
... print action, elem.tag
end element
end empty-element
end root
>>> context.root.tag
'root'
>>> context = iterparse(SIMPLE_NS_XMLFILE)
>>> for action, elem in context:
... print action, elem.tag
end {namespace}element
end {namespace}element
end {namespace}empty-element
end {namespace}root
>>> events = ()
>>> context = iterparse(SIMPLE_XMLFILE, events)
>>> for action, elem in context:
... print action, elem.tag
>>> events = ()
>>> context = iterparse(SIMPLE_XMLFILE, events=events)
>>> for action, elem in context:
... print action, elem.tag
>>> events = ("start", "end")
>>> context = iterparse(SIMPLE_XMLFILE, events)
>>> for action, elem in context:
... print action, elem.tag
start root
start element
end element
start element
end element
start empty-element
end empty-element
end root
>>> events = ("start", "end", "start-ns", "end-ns")
>>> context = iterparse(SIMPLE_NS_XMLFILE, events)
>>> for action, elem in context:
... if action in ("start", "end"):
... print action, elem.tag
... else:
... print action, elem
start-ns ('', 'namespace')
start {namespace}root
start {namespace}element
end {namespace}element
start {namespace}element
end {namespace}element
start {namespace}empty-element
end {namespace}empty-element
end {namespace}root
end-ns None
>>> events = ("start", "end", "bogus")
>>> with open(SIMPLE_XMLFILE, "rb") as f:
... iterparse(f, events)
Traceback (most recent call last):
ValueError: unknown event 'bogus'
>>> import StringIO
>>> source = StringIO.StringIO(
... "\\n"
... "text\\n")
>>> events = ("start-ns",)
>>> context = iterparse(source, events)
>>> for action, elem in context:
... print action, elem
start-ns ('', u'http://\\xe9ffbot.org/ns')
start-ns (u'cl\\xe9', 'http://effbot.org/ns')
>>> source = StringIO.StringIO(" junk")
>>> try:
... for action, elem in iterparse(source):
... print action, elem.tag
... except ET.ParseError, v:
... print v
end document
junk after document element: line 1, column 12
"""
def writefile():
"""
>>> elem = ET.Element("tag")
>>> elem.text = "text"
>>> serialize(elem)
'text '
>>> ET.SubElement(elem, "subtag").text = "subtext"
>>> serialize(elem)
'textsubtext '
Test tag suppression
>>> elem.tag = None
>>> serialize(elem)
'textsubtext '
>>> elem.insert(0, ET.Comment("comment"))
>>> serialize(elem) # assumes 1.3
'textsubtext '
>>> elem[0] = ET.PI("key", "value")
>>> serialize(elem)
'textsubtext '
"""
def custom_builder():
"""
Test parser w. custom builder.
>>> with open(SIMPLE_XMLFILE) as f:
... data = f.read()
>>> class Builder:
... def start(self, tag, attrib):
... print "start", tag
... def end(self, tag):
... print "end", tag
... def data(self, text):
... pass
>>> builder = Builder()
>>> parser = ET.XMLParser(target=builder)
>>> parser.feed(data)
start root
start element
end element
start element
end element
start empty-element
end empty-element
end root
>>> with open(SIMPLE_NS_XMLFILE) as f:
... data = f.read()
>>> class Builder:
... def start(self, tag, attrib):
... print "start", tag
... def end(self, tag):
... print "end", tag
... def data(self, text):
... pass
... def pi(self, target, data):
... print "pi", target, repr(data)
... def comment(self, data):
... print "comment", repr(data)
>>> builder = Builder()
>>> parser = ET.XMLParser(target=builder)
>>> parser.feed(data)
pi pi 'data'
comment ' comment '
start {namespace}root
start {namespace}element
end {namespace}element
start {namespace}element
end {namespace}element
start {namespace}empty-element
end {namespace}empty-element
end {namespace}root
"""
def getchildren():
"""
Test Element.getchildren()
>>> with open(SIMPLE_XMLFILE, "r") as f:
... tree = ET.parse(f)
>>> for elem in tree.getroot().iter():
... summarize_list(elem.getchildren())
['element', 'element', 'empty-element']
[]
[]
[]
>>> for elem in tree.getiterator():
... summarize_list(elem.getchildren())
['element', 'element', 'empty-element']
[]
[]
[]
>>> elem = ET.XML(SAMPLE_XML)
>>> len(elem.getchildren())
3
>>> len(elem[2].getchildren())
1
>>> elem[:] == elem.getchildren()
True
>>> child1 = elem[0]
>>> child2 = elem[2]
>>> del elem[1:2]
>>> len(elem.getchildren())
2
>>> child1 == elem[0]
True
>>> child2 == elem[1]
True
>>> elem[0:2] = [child2, child1]
>>> child2 == elem[0]
True
>>> child1 == elem[1]
True
>>> child1 == elem[0]
False
>>> elem.clear()
>>> elem.getchildren()
[]
"""
def writestring():
"""
>>> elem = ET.XML("text")
>>> ET.tostring(elem)
'text'
>>> elem = ET.fromstring("text")
>>> ET.tostring(elem)
'text'
"""
def check_encoding(encoding):
"""
>>> check_encoding("ascii")
>>> check_encoding("us-ascii")
>>> check_encoding("iso-8859-1")
>>> check_encoding("iso-8859-15")
>>> check_encoding("cp437")
>>> check_encoding("mac-roman")
"""
ET.XML(" " % encoding)
def encoding():
r"""
Test encoding issues.
>>> elem = ET.Element("tag")
>>> elem.text = u"abc"
>>> serialize(elem)
'abc '
>>> serialize(elem, encoding="utf-8")
'abc '
>>> serialize(elem, encoding="us-ascii")
'abc '
>>> serialize(elem, encoding="iso-8859-1")
"\nabc "
>>> elem.text = "<&\"\'>"
>>> serialize(elem)
'<&"\'> '
>>> serialize(elem, encoding="utf-8")
'<&"\'> '
>>> serialize(elem, encoding="us-ascii") # cdata characters
'<&"\'> '
>>> serialize(elem, encoding="iso-8859-1")
'\n<&"\'> '
>>> elem.attrib["key"] = "<&\"\'>"
>>> elem.text = None
>>> serialize(elem)
' '
>>> serialize(elem, encoding="utf-8")
' '
>>> serialize(elem, encoding="us-ascii")
' '
>>> serialize(elem, encoding="iso-8859-1")
'\n '
>>> elem.text = u'\xe5\xf6\xf6<>'
>>> elem.attrib.clear()
>>> serialize(elem)
'åöö<> '
>>> serialize(elem, encoding="utf-8")
'\xc3\xa5\xc3\xb6\xc3\xb6<> '
>>> serialize(elem, encoding="us-ascii")
'åöö<> '
>>> serialize(elem, encoding="iso-8859-1")
"\n\xe5\xf6\xf6<> "
>>> elem.attrib["key"] = u'\xe5\xf6\xf6<>'
>>> elem.text = None
>>> serialize(elem)
' '
>>> serialize(elem, encoding="utf-8")
' '
>>> serialize(elem, encoding="us-ascii")
' '
>>> serialize(elem, encoding="iso-8859-1")
'\n '
"""
def methods():
r"""
Test serialization methods.
>>> e = ET.XML("")
>>> e.tail = "\n"
>>> serialize(e)
'\n'
>>> serialize(e, method=None)
'\n'
>>> serialize(e, method="xml")
'\n'
>>> serialize(e, method="html")
'\n'
>>> serialize(e, method="text")
'1 < 2\n'
"""
def iterators():
"""
Test iterators.
>>> e = ET.XML("this is a paragraph...")
>>> summarize_list(e.iter())
['html', 'body', 'i']
>>> summarize_list(e.find("body").iter())
['body', 'i']
>>> summarize(next(e.iter()))
'html'
>>> "".join(e.itertext())
'this is a paragraph...'
>>> "".join(e.find("body").itertext())
'this is a paragraph.'
>>> next(e.itertext())
'this is a '
Method iterparse should return an iterator. See bug 6472.
>>> sourcefile = serialize(e, to_string=False)
>>> next(ET.iterparse(sourcefile)) # doctest: +ELLIPSIS
('end', )
>>> tree = ET.ElementTree(None)
>>> tree.iter()
Traceback (most recent call last):
AttributeError: 'NoneType' object has no attribute 'iter'
"""
ENTITY_XML = """\
%user-entities;
]>
&entity;
"""
def entity():
"""
Test entity handling.
1) good entities
>>> e = ET.XML("test ")
>>> serialize(e)
'test '
2) bad entities
>>> ET.XML("&entity; ")
Traceback (most recent call last):
ParseError: undefined entity: line 1, column 10
>>> ET.XML(ENTITY_XML)
Traceback (most recent call last):
ParseError: undefined entity &entity;: line 5, column 10
3) custom entity
>>> parser = ET.XMLParser()
>>> parser.entity["entity"] = "text"
>>> parser.feed(ENTITY_XML)
>>> root = parser.close()
>>> serialize(root)
'text '
"""
def error(xml):
"""
Test error handling.
>>> issubclass(ET.ParseError, SyntaxError)
True
>>> error("foo").position
(1, 0)
>>> error("&foo; ").position
(1, 5)
>>> error("foobar<").position
(1, 6)
"""
try:
ET.XML(xml)
except ET.ParseError:
return sys.exc_value
def namespace():
"""
Test namespace issues.
1) xml namespace
>>> elem = ET.XML(" ")
>>> serialize(elem) # 1.1
' '
2) other "well-known" namespaces
>>> elem = ET.XML(" ")
>>> serialize(elem) # 2.1
' '
>>> elem = ET.XML(" ")
>>> serialize(elem) # 2.2
' '
>>> elem = ET.XML(" ")
>>> serialize(elem) # 2.3
' '
3) unknown namespaces
>>> elem = ET.XML(SAMPLE_XML_NS)
>>> print serialize(elem)
text
subtext
"""
def qname():
"""
Test QName handling.
1) decorated tags
>>> elem = ET.Element("{uri}tag")
>>> serialize(elem) # 1.1
' '
>>> elem = ET.Element(ET.QName("{uri}tag"))
>>> serialize(elem) # 1.2
' '
>>> elem = ET.Element(ET.QName("uri", "tag"))
>>> serialize(elem) # 1.3
' '
>>> elem = ET.Element(ET.QName("uri", "tag"))
>>> subelem = ET.SubElement(elem, ET.QName("uri", "tag1"))
>>> subelem = ET.SubElement(elem, ET.QName("uri", "tag2"))
>>> serialize(elem) # 1.4
' '
2) decorated attributes
>>> elem.clear()
>>> elem.attrib["{uri}key"] = "value"
>>> serialize(elem) # 2.1
' '
>>> elem.clear()
>>> elem.attrib[ET.QName("{uri}key")] = "value"
>>> serialize(elem) # 2.2
' '
3) decorated values are not converted by default, but the
QName wrapper can be used for values
>>> elem.clear()
>>> elem.attrib["{uri}key"] = "{uri}value"
>>> serialize(elem) # 3.1
' '
>>> elem.clear()
>>> elem.attrib["{uri}key"] = ET.QName("{uri}value")
>>> serialize(elem) # 3.2
' '
>>> elem.clear()
>>> subelem = ET.Element("tag")
>>> subelem.attrib["{uri1}key"] = ET.QName("{uri2}value")
>>> elem.append(subelem)
>>> elem.append(subelem)
>>> serialize(elem) # 3.3
' '
4) Direct QName tests
>>> str(ET.QName('ns', 'tag'))
'{ns}tag'
>>> str(ET.QName('{ns}tag'))
'{ns}tag'
>>> q1 = ET.QName('ns', 'tag')
>>> q2 = ET.QName('ns', 'tag')
>>> q1 == q2
True
>>> q2 = ET.QName('ns', 'other-tag')
>>> q1 == q2
False
>>> q1 == 'ns:tag'
False
>>> q1 == '{ns}tag'
True
"""
def doctype_public():
"""
Test PUBLIC doctype.
>>> elem = ET.XML(''
... 'text')
"""
def xpath_tokenizer(p):
"""
Test the XPath tokenizer.
>>> # tests from the xml specification
>>> xpath_tokenizer("*")
['*']
>>> xpath_tokenizer("text()")
['text', '()']
>>> xpath_tokenizer("@name")
['@', 'name']
>>> xpath_tokenizer("@*")
['@', '*']
>>> xpath_tokenizer("para[1]")
['para', '[', '1', ']']
>>> xpath_tokenizer("para[last()]")
['para', '[', 'last', '()', ']']
>>> xpath_tokenizer("*/para")
['*', '/', 'para']
>>> xpath_tokenizer("/doc/chapter[5]/section[2]")
['/', 'doc', '/', 'chapter', '[', '5', ']', '/', 'section', '[', '2', ']']
>>> xpath_tokenizer("chapter//para")
['chapter', '//', 'para']
>>> xpath_tokenizer("//para")
['//', 'para']
>>> xpath_tokenizer("//olist/item")
['//', 'olist', '/', 'item']
>>> xpath_tokenizer(".")
['.']
>>> xpath_tokenizer(".//para")
['.', '//', 'para']
>>> xpath_tokenizer("..")
['..']
>>> xpath_tokenizer("../@lang")
['..', '/', '@', 'lang']
>>> xpath_tokenizer("chapter[title]")
['chapter', '[', 'title', ']']
>>> xpath_tokenizer("employee[@secretary and @assistant]")
['employee', '[', '@', 'secretary', '', 'and', '', '@', 'assistant', ']']
>>> # additional tests
>>> xpath_tokenizer("{http://spam}egg")
['{http://spam}egg']
>>> xpath_tokenizer("./spam.egg")
['.', '/', 'spam.egg']
>>> xpath_tokenizer(".//{http://spam}egg")
['.', '//', '{http://spam}egg']
"""
from xml.etree import ElementPath
out = []
for op, tag in ElementPath.xpath_tokenizer(p):
out.append(op or tag)
return out
def processinginstruction():
"""
Test ProcessingInstruction directly
>>> ET.tostring(ET.ProcessingInstruction('test', 'instruction'))
''
>>> ET.tostring(ET.PI('test', 'instruction'))
''
Issue #2746
>>> ET.tostring(ET.PI('test', ''))
'?>'
>>> ET.tostring(ET.PI('test', u'\xe3'), 'latin1')
"\\n\\xe3?>"
"""
#
# xinclude tests (samples from appendix C of the xinclude specification)
XINCLUDE = {}
XINCLUDE["C1.xml"] = """\
120 Mz is adequate for an average home user.
"""
XINCLUDE["disclaimer.xml"] = """\
The opinions represented herein represent those of the individual
and should not be interpreted as official policy endorsed by this
organization.
"""
XINCLUDE["C2.xml"] = """\
This document has been accessed
times.
"""
XINCLUDE["count.txt"] = "324387"
XINCLUDE["C2b.xml"] = """\
This document has been accessed
times.
"""
XINCLUDE["C3.xml"] = """\
The following is the source of the "data.xml" resource:
"""
XINCLUDE["data.xml"] = """\
"""
XINCLUDE["C5.xml"] = """\
"""
XINCLUDE["default.xml"] = """\
Example.
""".format(cgi.escape(SIMPLE_XMLFILE, True))
def xinclude_loader(href, parse="xml", encoding=None):
try:
data = XINCLUDE[href]
except KeyError:
raise IOError("resource not found")
if parse == "xml":
from xml.etree.ElementTree import XML
return XML(data)
return data
def xinclude():
r"""
Basic inclusion example (XInclude C.1)
>>> from xml.etree import ElementTree as ET
>>> from xml.etree import ElementInclude
>>> document = xinclude_loader("C1.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print serialize(document) # C1
120 Mz is adequate for an average home user.
The opinions represented herein represent those of the individual
and should not be interpreted as official policy endorsed by this
organization.
Textual inclusion example (XInclude C.2)
>>> document = xinclude_loader("C2.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print serialize(document) # C2
This document has been accessed
324387 times.
Textual inclusion after sibling element (based on modified XInclude C.2)
>>> document = xinclude_loader("C2b.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print(serialize(document)) # C2b
This document has been accessed
324387 times.
Textual inclusion of XML example (XInclude C.3)
>>> document = xinclude_loader("C3.xml")
>>> ElementInclude.include(document, xinclude_loader)
>>> print serialize(document) # C3
The following is the source of the "data.xml" resource:
<?xml version='1.0'?>
<data>
<item><![CDATA[Brooks & Shields]]></item>
</data>
Fallback example (XInclude C.5)
Note! Fallback support is not yet implemented
>>> document = xinclude_loader("C5.xml")
>>> ElementInclude.include(document, xinclude_loader)
Traceback (most recent call last):
IOError: resource not found
>>> # print serialize(document) # C5
"""
def xinclude_default():
"""
>>> from xml.etree import ElementInclude
>>> document = xinclude_loader("default.xml")
>>> ElementInclude.include(document)
>>> print serialize(document) # default
Example.
text
text tail
"""
#
# badly formatted xi:include tags
XINCLUDE_BAD = {}
XINCLUDE_BAD["B1.xml"] = """\
120 Mz is adequate for an average home user.
"""
XINCLUDE_BAD["B2.xml"] = """\
"""
def xinclude_failures():
r"""
Test failure to locate included XML file.
>>> from xml.etree import ElementInclude
>>> def none_loader(href, parser, encoding=None):
... return None
>>> document = ET.XML(XINCLUDE["C1.xml"])
>>> ElementInclude.include(document, loader=none_loader)
Traceback (most recent call last):
FatalIncludeError: cannot load 'disclaimer.xml' as 'xml'
Test failure to locate included text file.
>>> document = ET.XML(XINCLUDE["C2.xml"])
>>> ElementInclude.include(document, loader=none_loader)
Traceback (most recent call last):
FatalIncludeError: cannot load 'count.txt' as 'text'
Test bad parse type.
>>> document = ET.XML(XINCLUDE_BAD["B1.xml"])
>>> ElementInclude.include(document, loader=none_loader)
Traceback (most recent call last):
FatalIncludeError: unknown parse type in xi:include tag ('BAD_TYPE')
Test xi:fallback outside xi:include.
>>> document = ET.XML(XINCLUDE_BAD["B2.xml"])
>>> ElementInclude.include(document, loader=none_loader)
Traceback (most recent call last):
FatalIncludeError: xi:fallback tag must be child of xi:include ('{http://www.w3.org/2001/XInclude}fallback')
"""
# --------------------------------------------------------------------
# reported bugs
def bug_xmltoolkit21():
"""
marshaller gives obscure errors for non-string values
>>> elem = ET.Element(123)
>>> serialize(elem) # tag
Traceback (most recent call last):
TypeError: cannot serialize 123 (type int)
>>> elem = ET.Element("elem")
>>> elem.text = 123
>>> serialize(elem) # text
Traceback (most recent call last):
TypeError: cannot serialize 123 (type int)
>>> elem = ET.Element("elem")
>>> elem.tail = 123
>>> serialize(elem) # tail
Traceback (most recent call last):
TypeError: cannot serialize 123 (type int)
>>> elem = ET.Element("elem")
>>> elem.set(123, "123")
>>> serialize(elem) # attribute key
Traceback (most recent call last):
TypeError: cannot serialize 123 (type int)
>>> elem = ET.Element("elem")
>>> elem.set("123", 123)
>>> serialize(elem) # attribute value
Traceback (most recent call last):
TypeError: cannot serialize 123 (type int)
"""
def bug_xmltoolkit25():
"""
typo in ElementTree.findtext
>>> elem = ET.XML(SAMPLE_XML)
>>> tree = ET.ElementTree(elem)
>>> tree.findtext("tag")
'text'
>>> tree.findtext("section/tag")
'subtext'
"""
def bug_xmltoolkit28():
"""
.//tag causes exceptions
>>> tree = ET.XML("
")
>>> summarize_list(tree.findall(".//thead"))
[]
>>> summarize_list(tree.findall(".//tbody"))
['tbody']
"""
def bug_xmltoolkitX1():
"""
dump() doesn't flush the output buffer
>>> tree = ET.XML("
")
>>> ET.dump(tree); sys.stdout.write("tail")
tail
"""
def bug_xmltoolkit39():
"""
non-ascii element and attribute names doesn't work
>>> tree = ET.XML(" ")
>>> ET.tostring(tree, "utf-8")
' '
>>> tree = ET.XML(" ")
>>> tree.attrib
{u'\\xe4ttr': u'v\\xe4lue'}
>>> ET.tostring(tree, "utf-8")
' '
>>> tree = ET.XML("text ")
>>> ET.tostring(tree, "utf-8")
'text '
>>> tree = ET.Element(u"t\u00e4g")
>>> ET.tostring(tree, "utf-8")
' '
>>> tree = ET.Element("tag")
>>> tree.set(u"\u00e4ttr", u"v\u00e4lue")
>>> ET.tostring(tree, "utf-8")
' '
"""
def bug_xmltoolkit54():
"""
problems handling internally defined entities
>>> e = ET.XML("]>&ldots; ")
>>> serialize(e)
'舰 '
"""
def bug_xmltoolkit55():
"""
make sure we're reporting the first error, not the last
>>> e = ET.XML("&ldots;&ndots;&rdots; ")
Traceback (most recent call last):
ParseError: undefined entity &ldots;: line 1, column 36
"""
class ExceptionFile:
def read(self, x):
raise IOError
def xmltoolkit60():
"""
Handle crash in stream source.
>>> tree = ET.parse(ExceptionFile())
Traceback (most recent call last):
IOError
"""
XMLTOOLKIT62_DOC = """
A new cultivar of Begonia plant named ‘BCT9801BEG’.
"""
def xmltoolkit62():
"""
Don't crash when using custom entities.
>>> xmltoolkit62()
u'A new cultivar of Begonia plant named \u2018BCT9801BEG\u2019.'
"""
ENTITIES = {u'rsquo': u'\u2019', u'lsquo': u'\u2018'}
parser = ET.XMLTreeBuilder()
parser.entity.update(ENTITIES)
parser.feed(XMLTOOLKIT62_DOC)
t = parser.close()
return t.find('.//paragraph').text
def xmltoolkit63():
"""
Check reference leak.
>>> xmltoolkit63()
>>> count = sys.getrefcount(None)
>>> for i in range(1000):
... xmltoolkit63()
>>> sys.getrefcount(None) - count
0
"""
tree = ET.TreeBuilder()
tree.start("tag", {})
tree.data("text")
tree.end("tag")
# --------------------------------------------------------------------
def bug_200708_newline():
r"""
Preserve newlines in attributes.
>>> e = ET.Element('SomeTag', text="def _f():\n return 3\n")
>>> ET.tostring(e)
' '
>>> ET.XML(ET.tostring(e)).get("text")
'def _f():\n return 3\n'
>>> ET.tostring(ET.XML(ET.tostring(e)))
' '
"""
def bug_200708_close():
"""
Test default builder.
>>> parser = ET.XMLParser() # default
>>> parser.feed("some text ")
>>> summarize(parser.close())
'element'
Test custom builder.
>>> class EchoTarget:
... def close(self):
... return ET.Element("element") # simulate root
>>> parser = ET.XMLParser(EchoTarget())
>>> parser.feed("some text ")
>>> summarize(parser.close())
'element'
"""
def bug_200709_default_namespace():
"""
>>> e = ET.Element("{default}elem")
>>> s = ET.SubElement(e, "{default}elem")
>>> serialize(e, default_namespace="default") # 1
' '
>>> e = ET.Element("{default}elem")
>>> s = ET.SubElement(e, "{default}elem")
>>> s = ET.SubElement(e, "{not-default}elem")
>>> serialize(e, default_namespace="default") # 2
' '
>>> e = ET.Element("{default}elem")
>>> s = ET.SubElement(e, "{default}elem")
>>> s = ET.SubElement(e, "elem") # unprefixed name
>>> serialize(e, default_namespace="default") # 3
Traceback (most recent call last):
ValueError: cannot use non-qualified names with default_namespace option
"""
def bug_200709_register_namespace():
"""
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
' '
>>> ET.register_namespace("foo", "http://namespace.invalid/does/not/exist/")
>>> ET.tostring(ET.Element("{http://namespace.invalid/does/not/exist/}title"))
' '
And the Dublin Core namespace is in the default list:
>>> ET.tostring(ET.Element("{http://purl.org/dc/elements/1.1/}title"))
' '
"""
def bug_200709_element_comment():
"""
Not sure if this can be fixed, really (since the serializer needs
ET.Comment, not cET.comment).
>>> a = ET.Element('a')
>>> a.append(ET.Comment('foo'))
>>> a[0].tag == ET.Comment
True
>>> a = ET.Element('a')
>>> a.append(ET.PI('foo'))
>>> a[0].tag == ET.PI
True
"""
def bug_200709_element_insert():
"""
>>> a = ET.Element('a')
>>> b = ET.SubElement(a, 'b')
>>> c = ET.SubElement(a, 'c')
>>> d = ET.Element('d')
>>> a.insert(0, d)
>>> summarize_list(a)
['d', 'b', 'c']
>>> a.insert(-1, d)
>>> summarize_list(a)
['d', 'b', 'd', 'c']
"""
def bug_200709_iter_comment():
"""
>>> a = ET.Element('a')
>>> b = ET.SubElement(a, 'b')
>>> comment_b = ET.Comment("TEST-b")
>>> b.append(comment_b)
>>> summarize_list(a.iter(ET.Comment))
['']
"""
# --------------------------------------------------------------------
# reported on bugs.python.org
def bug_1534630():
"""
>>> bob = ET.TreeBuilder()
>>> e = bob.data("data")
>>> e = bob.start("tag", {})
>>> e = bob.end("tag")
>>> e = bob.close()
>>> serialize(e)
' '
"""
def check_issue6233():
"""
>>> e = ET.XML("t\\xc3\\xa3g")
>>> ET.tostring(e, 'ascii')
"\\ntãg"
>>> e = ET.XML("t\\xe3g")
>>> ET.tostring(e, 'ascii')
"\\ntãg"
"""
def check_issue3151():
"""
>>> e = ET.XML(' ')
>>> e.tag
'{${stuff}}localname'
>>> t = ET.ElementTree(e)
>>> ET.tostring(e)
' '
"""
def check_issue6565():
"""
>>> elem = ET.XML(" ")
>>> summarize_list(elem)
['tag']
>>> newelem = ET.XML(SAMPLE_XML)
>>> elem[:] = newelem[:]
>>> summarize_list(elem)
['tag', 'tag', 'section']
"""
def check_html_empty_elems_serialization(self):
# issue 15970
# from http://www.w3.org/TR/html401/index/elements.html
"""
>>> empty_elems = ['AREA', 'BASE', 'BASEFONT', 'BR', 'COL', 'FRAME', 'HR',
... 'IMG', 'INPUT', 'ISINDEX', 'LINK', 'META', 'PARAM']
>>> elems = ''.join('<%s />' % elem for elem in empty_elems)
>>> serialize(ET.XML('%s' % elems), method='html')
'
'
>>> serialize(ET.XML('%s' % elems.lower()), method='html')
'
'
>>> elems = ''.join('<%s>%s>' % (elem, elem) for elem in empty_elems)
>>> serialize(ET.XML('%s' % elems), method='html')
'
'
>>> serialize(ET.XML('%s' % elems.lower()), method='html')
'
'
"""
# --------------------------------------------------------------------
class CleanContext(object):
"""Provide default namespace mapping and path cache."""
checkwarnings = None
def __init__(self, quiet=False):
if sys.flags.optimize >= 2:
# under -OO, doctests cannot be run and therefore not all warnings
# will be emitted
quiet = True
deprecations = (
# Search behaviour is broken if search path starts with "/".
("This search is broken in 1.3 and earlier, and will be fixed "
"in a future version. If you rely on the current behaviour, "
"change it to '.+'", FutureWarning),
# Element.getchildren() and Element.getiterator() are deprecated.
("This method will be removed in future versions. "
"Use .+ instead.", DeprecationWarning),
("This method will be removed in future versions. "
"Use .+ instead.", PendingDeprecationWarning),
# XMLParser.doctype() is deprecated.
("This method of XMLParser is deprecated. Define doctype.. "
"method on the TreeBuilder target.", DeprecationWarning))
self.checkwarnings = test_support.check_warnings(*deprecations,
quiet=quiet)
def __enter__(self):
from xml.etree import ElementTree
self._nsmap = ElementTree._namespace_map
self._path_cache = ElementTree.ElementPath._cache
# Copy the default namespace mapping
ElementTree._namespace_map = self._nsmap.copy()
# Copy the path cache (should be empty)
ElementTree.ElementPath._cache = self._path_cache.copy()
self.checkwarnings.__enter__()
def __exit__(self, *args):
from xml.etree import ElementTree
# Restore mapping and path cache
ElementTree._namespace_map = self._nsmap
ElementTree.ElementPath._cache = self._path_cache
self.checkwarnings.__exit__(*args)
def test_main(module_name='xml.etree.ElementTree'):
from test import test_xml_etree
use_py_module = (module_name == 'xml.etree.ElementTree')
# The same doctests are used for both the Python and the C implementations
assert test_xml_etree.ET.__name__ == module_name
# XXX the C module should give the same warnings as the Python module
with CleanContext(quiet=not use_py_module):
test_support.run_doctest(test_xml_etree, verbosity=True)
# The module should not be changed by the tests
assert test_xml_etree.ET.__name__ == module_name
if __name__ == '__main__':
test_main()