All Downloads are FREE. Search and download functionalities are using the official Maven repository.

test.test_parsers.test_rst.test_inline_markup.py Maven / Gradle / Ivy

There is a newer version: 2.4
Show newest version
#! /usr/bin/env python
# -*- coding: utf8 -*-

# $Id: test_inline_markup.py 7243 2011-12-05 19:35:32Z milde $
# Author: David Goodger 
# Copyright: This module has been placed in the public domain.

"""
Tests for inline markup in docutils/parsers/rst/states.py.
Interpreted text tests are in a separate module, test_interpreted.py.
"""

from __init__ import DocutilsTestSupport

def suite():
    s = DocutilsTestSupport.ParserTestSuite()
    s.generateTests(totest)
    return s

totest = {}

totest['emphasis'] = [
["""\
*emphasis*
""",
"""\

    
        
            emphasis
"""],
[u"""\
l'*emphasis* with the *emphasis*' apostrophe.
l\u2019*emphasis* with the *emphasis*\u2019 apostrophe.
""",
u"""\

    
        l\'
        
            emphasis
         with the \n\
        
            emphasis
        \' apostrophe.
        l\u2019
        
            emphasis
         with the \n\
        
            emphasis
        \u2019 apostrophe.
"""],
["""\
*emphasized sentence
across lines*
""",
"""\

    
        
            emphasized sentence
            across lines
"""],
["""\
*emphasis without closing asterisk
""",
"""\

    
        
            *
        emphasis without closing asterisk
    
        
            Inline emphasis start-string without end-string.
"""],
[r"""some punctuation is allowed around inline markup, e.g.
/*emphasis*/, -*emphasis*-, and :*emphasis*: (delimiters),
(*emphasis*), [*emphasis*], <*emphasis*>, {*emphasis*} (open/close pairs)

but not
)*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
(*), [*], '*' or '"*"' ("quoted" start-string),
x*2* or 2*x* (alphanumeric char before),
\*args or * (escaped, whitespace behind start-string)
or *the\* *stars\* *inside* (escaped, whitespace before end-string).

However, '*args' will trigger a warning and may be problematic.

what about *this**?
""",
"""\

    
        some punctuation is allowed around inline markup, e.g.
        /
        
            emphasis
        /, -
        
            emphasis
        -, and :
        
            emphasis
        : (delimiters),
        (
        
            emphasis
        ), [
        
            emphasis
        ], <
        
            emphasis
        >, {
        
            emphasis
        } (open/close pairs)
    
        but not
        )*emphasis*(, ]*emphasis*[, >*emphasis*>, }*emphasis*{ (close/open pairs)
        (*), [*], '*' or '"*"' ("quoted" start-string),
        x*2* or 2*x* (alphanumeric char before),
        *args or * (escaped, whitespace behind start-string)
        or \n\
        
            the* *stars* *inside
         (escaped, whitespace before end-string).
    
        However, '
        
            *
        args' will trigger a warning and may be problematic.
    
        
            Inline emphasis start-string without end-string.
    
        what about \n\
        
            this*
        ?
"""],
[u"""\
Quotes around inline markup:

'*emphasis*' "*emphasis*" Straight,
‘*emphasis*’ “*emphasis*” English, ...,
« *emphasis* » ‹ *emphasis* › « *emphasis* » ‹ *emphasis* ›
« *emphasis* » ‹ *emphasis* › French,
„*emphasis*“ ‚*emphasis*‘ »*emphasis*« ›*emphasis*‹ German, Czech, ...,
„*emphasis*” «*emphasis*» Romanian,
“*emphasis*„ ‘*emphasis*‚ Greek,
「*emphasis*」 『*emphasis*』traditional Chinese,
”*emphasis*” ’*emphasis*’ »*emphasis*» ›*emphasis*› Swedish, Finnish,
„*emphasis*” ‚*emphasis*’ Polish,
„*emphasis*” »*emphasis*« ’*emphasis*’ Hungarian,
""",
u"""\

    
        Quotes around inline markup:
    
        \'
        
            emphasis
        \' "
        
            emphasis
        " Straight,
        \u2018
        
            emphasis
        \u2019 \u201c
        
            emphasis
        \u201d English, ...,
        \xab\u202f
        
            emphasis
        \u202f\xbb \u2039\u202f
        
            emphasis
        \u202f\u203a \xab\xa0
        
            emphasis
        \xa0\xbb \u2039\xa0
        
            emphasis
        \xa0\u203a
        \xab\u2005
        
            emphasis
        \u2005\xbb \u2039\u2005
        
            emphasis
        \u2005\u203a French,
        \u201e
        
            emphasis
        \u201c \u201a
        
            emphasis
        \u2018 \xbb
        
            emphasis
        \xab \u203a
        
            emphasis
        \u2039 German, Czech, ...,
        \u201e
        
            emphasis
        \u201d \xab
        
            emphasis
        \xbb Romanian,
        \u201c
        
            emphasis
        \u201e \u2018
        
            emphasis
        \u201a Greek,
        \u300c
        
            emphasis
        \u300d \u300e
        
            emphasis
        \u300ftraditional Chinese,
        \u201d
        
            emphasis
        \u201d \u2019
        
            emphasis
        \u2019 \xbb
        
            emphasis
        \xbb \u203a
        
            emphasis
        \u203a Swedish, Finnish,
        \u201e
        
            emphasis
        \u201d \u201a
        
            emphasis
        \u2019 Polish,
        \u201e
        
            emphasis
        \u201d \xbb
        
            emphasis
        \xab \u2019
        
            emphasis
        \u2019 Hungarian,
"""],
[r"""
Emphasized asterisk: *\**

Emphasized double asterisk: *\***
""",
"""\

    
        Emphasized asterisk: \n\
        
            *
    
        Emphasized double asterisk: \n\
        
            **
"""],
]

totest['strong'] = [
["""\
**strong**
""",
"""\

    
        
            strong
"""],
[u"""\
l'**strong** and l\u2019**strong** with apostrophe
""",
u"""\

    
        l'
        
            strong
         and l\u2019
        
            strong
         with apostrophe
"""],
[u"""\
quoted '**strong**', quoted "**strong**",
quoted \u2018**strong**\u2019, quoted \u201c**strong**\u201d,
quoted \xab**strong**\xbb
""",
u"""\

    
        quoted '
        
            strong
        ', quoted "
        
            strong
        ",
        quoted \u2018
        
            strong
        \u2019, quoted \u201c
        
            strong
        \u201d,
        quoted \xab
        
            strong
        \xbb
"""],
[r"""
(**strong**) but not (**) or '(** ' or x**2 or \**kwargs or **

(however, '**kwargs' will trigger a warning and may be problematic)
""",
"""\

    
        (
        
            strong
        ) but not (**) or '(** ' or x**2 or **kwargs or **
    
        (however, '
        
            **
        kwargs' will trigger a warning and may be problematic)
    
        
            Inline strong start-string without end-string.
"""],
["""\
Strong asterisk: *****

Strong double asterisk: ******
""",
"""\

    
        Strong asterisk: \n\
        
            *
    
        Strong double asterisk: \n\
        
            **
"""],
["""\
**strong without closing asterisks
""",
"""\

    
        
            **
        strong without closing asterisks
    
        
            Inline strong start-string without end-string.
"""],
]

totest['literal'] = [
["""\
``literal``
""",
"""\

    
        
            literal
"""],
[r"""
``\literal``
""",
"""\

    
        
            \\literal
"""],
[r"""
``lite\ral``
""",
"""\

    
        
            lite\\ral
"""],
[r"""
``literal\``
""",
"""\

    
        
            literal\\
"""],
[u"""\
l'``literal`` and l\u2019``literal`` with apostrophe
""",
u"""\

    
        l'
        
            literal
         and l\u2019
        
            literal
         with apostrophe
"""],
[u"""\
quoted '``literal``', quoted "``literal``",
quoted \u2018``literal``\u2019, quoted \u201c``literal``\u201d,
quoted \xab``literal``\xbb
""",
u"""\

    
        quoted '
        
            literal
        ', quoted "
        
            literal
        ",
        quoted \u2018
        
            literal
        \u2019, quoted \u201c
        
            literal
        \u201d,
        quoted \xab
        
            literal
        \xbb
"""],
[u"""\
``'literal'`` with quotes, ``"literal"`` with quotes,
``\u2018literal\u2019`` with quotes, ``\u201cliteral\u201d`` with quotes,
``\xabliteral\xbb`` with quotes
""",
u"""\

    
        
            'literal'
         with quotes, \n\
        
            "literal"
         with quotes,
        
            \u2018literal\u2019
         with quotes, \n\
        
            \u201cliteral\u201d
         with quotes,
        
            \xabliteral\xbb
         with quotes
"""],
[r"""
``literal ``TeX quotes'' & \backslash`` but not "``" or ``

(however, ``standalone TeX quotes'' will trigger a warning
and may be problematic)
""",
"""\

    
        
            literal ``TeX quotes'' & \\backslash
         but not "``" or ``
    
        (however, \n\
        
            ``
        standalone TeX quotes'' will trigger a warning
        and may be problematic)
    
        
            Inline literal start-string without end-string.
"""],
["""\
Find the ```interpreted text``` in this paragraph!
""",
"""\

    
        Find the \n\
        
            `interpreted text`
         in this paragraph!
"""],
["""\
``literal without closing backquotes
""",
"""\

    
        
            ``
        literal without closing backquotes
    
        
            Inline literal start-string without end-string.
"""],
[r"""
Python ``list``\s use square bracket syntax.
""",
"""\

    
        Python \n\
        
            list
        s use square bracket syntax.
"""],
]

totest['references'] = [
["""\
ref_
""",
"""\

    
        
            ref
"""],
[u"""\
l'ref_ and l\u2019ref_ with apostrophe
""",
u"""\

    
        l'
        
            ref
         and l\u2019
        
            ref
         with apostrophe
"""],
[u"""\
quoted 'ref_', quoted "ref_",
quoted \u2018ref_\u2019, quoted \u201cref_\u201d,
quoted \xabref_\xbb,
but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
\u201cref ref\u201d_, or \xabref ref\xbb_
""",
u"""\

    
        quoted '
        
            ref
        ', quoted "
        
            ref
        ",
        quoted \u2018
        
            ref
        \u2019, quoted \u201c
        
            ref
        \u201d,
        quoted \xab
        
            ref
        \xbb,
        but not 'ref ref'_, "ref ref"_, \u2018ref ref\u2019_,
        \u201cref ref\u201d_, or \xabref ref\xbb_
"""],
["""\
ref__
""",
"""\

    
        
            ref
"""],
[u"""\
l'ref__ and l\u2019ref__ with apostrophe
""",
u"""\

    
        l'
        
            ref
         and l\u2019
        
            ref
         with apostrophe
"""],
[u"""\
quoted 'ref__', quoted "ref__",
quoted \u2018ref__\u2019, quoted \u201cref__\u201d,
quoted \xabref__\xbb,
but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
\u201cref ref\u201d__, or \xabref ref\xbb__
""",
u"""\

    
        quoted '
        
            ref
        ', quoted "
        
            ref
        ",
        quoted \u2018
        
            ref
        \u2019, quoted \u201c
        
            ref
        \u201d,
        quoted \xab
        
            ref
        \xbb,
        but not 'ref ref'__, "ref ref"__, \u2018ref ref\u2019__,
        \u201cref ref\u201d__, or \xabref ref\xbb__
"""],
["""\
ref_, r_, r_e-f_, -ref_, and anonymousref__,
but not _ref_ or __attr__ or object.__attr__
""",
"""\

    
        
            ref
        , \n\
        
            r
        , \n\
        
            r_e-f
        , -
        
            ref
        , and \n\
        
            anonymousref
        ,
        but not _ref_ or __attr__ or object.__attr__
"""],
]

totest['phrase_references'] = [
["""\
`phrase reference`_
""",
"""\

    
        
            phrase reference
"""],
[u"""\
l'`phrase reference`_ and l\u2019`phrase reference`_ with apostrophe
""",
u"""\

    
        l'
        
            phrase reference
         and l\u2019
        
            phrase reference
         with apostrophe
"""],
[u"""\
quoted '`phrase reference`_', quoted "`phrase reference`_",
quoted \u2018`phrase reference`_\u2019,
quoted \u201c`phrase reference`_\u201d,
quoted \xab`phrase reference`_\xbb
""",
u"""\

    
        quoted '
        
            phrase reference
        ', quoted "
        
            phrase reference
        ",
        quoted \u2018
        
            phrase reference
        \u2019,
        quoted \u201c
        
            phrase reference
        \u201d,
        quoted \xab
        
            phrase reference
        \xbb
"""],
[u"""\
`'phrase reference'`_ with quotes, `"phrase reference"`_ with quotes,
`\u2018phrase reference\u2019`_ with quotes,
`\u201cphrase reference\u201d`_ with quotes,
`\xabphrase reference\xbb`_ with quotes
""",
u"""\

    
        
            'phrase reference'
         with quotes, \n\
        
            "phrase reference"
         with quotes,
        
            \u2018phrase reference\u2019
         with quotes,
        
            \u201cphrase reference\u201d
         with quotes,
        
            \xabphrase reference\xbb
         with quotes
"""],
["""\
`anonymous reference`__
""",
"""\

    
        
            anonymous reference
"""],
[u"""\
l'`anonymous reference`__ and l\u2019`anonymous reference`__ with apostrophe
""",
u"""\

    
        l'
        
            anonymous reference
         and l\u2019
        
            anonymous reference
         with apostrophe
"""],
[u"""\
quoted '`anonymous reference`__', quoted "`anonymous reference`__",
quoted \u2018`anonymous reference`__\u2019,
quoted \u201c`anonymous reference`__\u201d,
quoted \xab`anonymous reference`__\xbb
""",
u"""\

    
        quoted '
        
            anonymous reference
        ', quoted "
        
            anonymous reference
        ",
        quoted \u2018
        
            anonymous reference
        \u2019,
        quoted \u201c
        
            anonymous reference
        \u201d,
        quoted \xab
        
            anonymous reference
        \xbb
"""],
[u"""\
`'anonymous reference'`__ with quotes, `"anonymous reference"`__ with quotes,
`\u2018anonymous reference\u2019`__ with quotes,
`\u201canonymous reference\u201d`__ with quotes,
`\xabanonymous reference\xbb`__ with quotes
""",
u"""\

    
        
            'anonymous reference'
         with quotes, \n\
        
            "anonymous reference"
         with quotes,
        
            \u2018anonymous reference\u2019
         with quotes,
        
            \u201canonymous reference\u201d
         with quotes,
        
            \xabanonymous reference\xbb
         with quotes
"""],
["""\
`phrase reference
across lines`_
""",
"""\

    
        
            phrase reference
            across lines
"""],
["""\
`phrase\`_ reference`_
""",
"""\

    
        
            phrase`_ reference
"""],
["""\
Invalid phrase reference:

:role:`phrase reference`_
""",
"""\

    
        Invalid phrase reference:
    
        
            :role:`phrase reference`_
    
        
            Mismatch: both interpreted text role prefix and reference suffix.
"""],
["""\
Invalid phrase reference:

`phrase reference`:role:_
""",
"""\

    
        Invalid phrase reference:
    
        
            `phrase reference`:role:_
    
        
            Mismatch: both interpreted text role suffix and reference suffix.
"""],
["""\
`phrase reference_ without closing backquote
""",
"""\

    
        
            `
        phrase \n\
        
            reference
         without closing backquote
    
        
            Inline interpreted text or phrase reference start-string without end-string.
"""],
["""\
`anonymous phrase reference__ without closing backquote
""",
"""\

    
        
            `
        anonymous phrase \n\
        
            reference
         without closing backquote
    
        
            Inline interpreted text or phrase reference start-string without end-string.
"""],
]

totest['embedded_URIs'] = [
["""\
`phrase reference `_
""",
"""\

    
        
            phrase reference
        
"""],
["""\
`anonymous reference `__
""",
"""\

    
        
            anonymous reference
"""],
["""\
`embedded URI on next line
`__
""",
"""\

    
        
            embedded URI on next line
"""],
["""\
`embedded URI across lines `__
""",
"""\

    
        
            embedded URI across lines
"""],
["""\
`embedded URI with whitespace `__
""",
"""\

    
        
            embedded URI with whitespace
"""],
["""\
`embedded email address `__

`embedded email address broken across lines `__
""",
"""\

    
        
            embedded email address
    
        
            embedded email address broken across lines
"""],
[r"""
`embedded URI with too much whitespace < http://example.com/
long/path /and  /whitespace >`__

`embedded URI with too much whitespace at end `__

`embedded URI with no preceding whitespace`__

`escaped URI \`__

See `HTML Anchors: \`_.
""",
"""\

    
        
            embedded URI with too much whitespace < http://example.com/
            long/path /and  /whitespace >
    
        
            embedded URI with too much whitespace at end 
    
        
            embedded URI with no preceding whitespace
    
        
            escaped URI 
    
        See \n\
        
            HTML Anchors: 
        .
"""],
["""\
Relative URIs' reference text can be omitted:

``_

``__
""",
"""\

    
        Relative URIs' reference text can be omitted:
    
        
            reference
        
    
        
            anonymous
"""],
]

totest['inline_targets'] = [
["""\
_`target`

Here is _`another target` in some text. And _`yet
another target`, spanning lines.

_`Here is  a    TaRgeT` with case and spacial difficulties.
""",
"""\

    
        
            target
    
        Here is \n\
        
            another target
         in some text. And \n\
        
            yet
            another target
        , spanning lines.
    
        
            Here is  a    TaRgeT
         with case and spacial difficulties.
"""],
[u"""\
l'_`target1` and l\u2019_`target2` with apostrophe
""",
u"""\

    
        l'
        
            target1
         and l\u2019
        
            target2
         with apostrophe
"""],
[u"""\
quoted '_`target1`', quoted "_`target2`",
quoted \u2018_`target3`\u2019, quoted \u201c_`target4`\u201d,
quoted \xab_`target5`\xbb
""",
u"""\

    
        quoted '
        
            target1
        ', quoted "
        
            target2
        ",
        quoted \u2018
        
            target3
        \u2019, quoted \u201c
        
            target4
        \u201d,
        quoted \xab
        
            target5
        \xbb
"""],
[u"""\
_`'target1'` with quotes, _`"target2"` with quotes,
_`\u2018target3\u2019` with quotes, _`\u201ctarget4\u201d` with quotes,
_`\xabtarget5\xbb` with quotes
""",
u"""\

    
        
            'target1'
         with quotes, \n\
        
            "target2"
         with quotes,
        
            \u2018target3\u2019
         with quotes, \n\
        
            \u201ctarget4\u201d
         with quotes,
        
            \xabtarget5\xbb
         with quotes
"""],
["""\
But this isn't a _target; targets require backquotes.

And _`this`_ is just plain confusing.
""",
"""\

    
        But this isn't a _target; targets require backquotes.
    
        And \n\
        
            _`
        this`_ is just plain confusing.
    
        
            Inline target start-string without end-string.
"""],
["""\
_`inline target without closing backquote
""",
"""\

    
        
            _`
        inline target without closing backquote
    
        
            Inline target start-string without end-string.
"""],
]

totest['footnote_reference'] = [
["""\
[1]_
""",
"""\

    
        
            1
"""],
["""\
[#]_
""",
"""\

    
        
"""],
["""\
[#label]_
""",
"""\

    
        
"""],
["""\
[*]_
""",
"""\

    
        
"""],
["""\
Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
""",
"""\

    
        Adjacent footnote refs are not possible: [*]_[#label]_ [#]_[2]_ [1]_[*]_
"""],
]

totest['citation_reference'] = [
["""\
[citation]_
""",
"""\

    
        
            citation
"""],
["""\
[citation]_ and [cit-ation]_ and [cit.ation]_ and [CIT1]_ but not [CIT 1]_
""",
"""\

    
        
            citation
         and \n\
        
            cit-ation
         and \n\
        
            cit.ation
         and \n\
        
            CIT1
         but not [CIT 1]_
"""],
["""\
Adjacent citation refs are not possible: [citation]_[CIT1]_
""",
"""\

    
        Adjacent citation refs are not possible: [citation]_[CIT1]_
"""],
]

totest['substitution_references'] = [
["""\
|subref|
""",
"""\

    
        
            subref
"""],
["""\
|subref|_ and |subref|__
""",
"""\

    
        
            
                subref
         and \n\
        
            
                subref
"""],
["""\
|substitution reference|
""",
"""\

    
        
            substitution reference
"""],
["""\
|substitution
reference|
""",
"""\

    
        
            substitution
            reference
"""],
["""\
|substitution reference without closing verbar
""",
"""\

    
        
            |
        substitution reference without closing verbar
    
        
            Inline substitution_reference start-string without end-string.
"""],
["""\
first | then || and finally |||
""",
"""\

    
        first | then || and finally |||
"""],
]

totest['standalone_hyperlink'] = [
["""\
http://www.standalone.hyperlink.com

http:/one-slash-only.absolute.path

[http://example.com]

(http://example.com)



http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html

http://[3ffe:2a00:100:7031::1] (the final "]" is ambiguous in text)

http://[3ffe:2a00:100:7031::1]/

mailto:[email protected]

news:comp.lang.python

An email address in a sentence: [email protected].

ftp://ends.with.a.period.

(a.question.mark@end?)
""",
"""\

    
        
            http://www.standalone.hyperlink.com
    
        
            http:/one-slash-only.absolute.path
    
        [
        
            http://example.com
        ]
    
        (
        
            http://example.com
        )
    
        <
        
            http://example.com
        >
    
        
            http://[1080:0:0:0:8:800:200C:417A]/IPv6address.html
    
        
            http://[3ffe:2a00:100:7031::1
        ] (the final "]" is ambiguous in text)
    
        
            http://[3ffe:2a00:100:7031::1]/
    
        
            mailto:[email protected]
    
        
            news:comp.lang.python
    
        An email address in a sentence: \n\
        
            [email protected]
        .
    
        
            ftp://ends.with.a.period
        .
    
        (
        
            a.question.mark@end
        ?)
"""],
[r"""
Valid URLs with escaped markup characters:

http://example.com/\*content\*/whatever

http://example.com/\*content*/whatever
""",
"""\

    
        Valid URLs with escaped markup characters:
    
        
            http://example.com/*content*/whatever
    
        
            http://example.com/*content*/whatever
"""],
["""\
Valid URLs may end with punctuation inside "<>":


""",
"""\

    
        Valid URLs may end with punctuation inside "<>":
    
        <
        
            http://example.org/ends-with-dot.
        >
"""],
["""\
Valid URLs with interesting endings:

http://example.org/ends-with-pluses++
""",
"""\

    
        Valid URLs with interesting endings:
    
        
            http://example.org/ends-with-pluses++
"""],
["""\
None of these are standalone hyperlinks (their "schemes"
are not recognized): signal:noise, a:b.
""",
"""\

    
        None of these are standalone hyperlinks (their "schemes"
        are not recognized): signal:noise, a:b.
"""],
["""\
Escaped email addresses are not recognized: test\@example.org
""",
"""\

    
        Escaped email addresses are not recognized: [email protected]
"""],
]

totest['markup recognition rules'] = [
["""\
__This__ should be left alone.
""",
"""\

    
        __This__ should be left alone.
"""],
[r"""
Character-level m\ *a*\ **r**\ ``k``\ `u`:title:\p
with backslash-escaped whitespace, including new\
lines.
""",
"""\

    
        Character-level m
        
            a
        
            r
        
            k
        
            u
        p
        with backslash-escaped whitespace, including newlines.
"""],
[u"""\
text-*separated*\u2010*by*\u2011*various*\u2012*dashes*\u2013*and*\u2014*hyphens*.
\u00bf*punctuation*? \u00a1*examples*!\u00a0*\u00a0no-break-space\u00a0*.
""",
u"""\

    
        text-
        
            separated
        \u2010
        
            by
        \u2011
        
            various
        \u2012
        
            dashes
        \u2013
        
            and
        \u2014
        
            hyphens
        .
        \xbf
        
            punctuation
        ? \xa1
        
            examples
        !\xa0
        
            \u00a0no-break-space\u00a0
        .
"""],
# Whitespace characters:
#  \u180e*MONGOLIAN VOWEL SEPARATOR*\u180e,   fails in Python 2.4
[u"""\
text separated by
*newline*
or *space* or one of
\xa0*NO-BREAK SPACE*\xa0,
\u1680*OGHAM SPACE MARK*\u1680,
\u2000*EN QUAD*\u2000,
\u2001*EM QUAD*\u2001,
\u2002*EN SPACE*\u2002,
\u2003*EM SPACE*\u2003,
\u2004*THREE-PER-EM SPACE*\u2004,
\u2005*FOUR-PER-EM SPACE*\u2005,
\u2006*SIX-PER-EM SPACE*\u2006,
\u2007*FIGURE SPACE*\u2007,
\u2008*PUNCTUATION SPACE*\u2008,
\u2009*THIN SPACE*\u2009,
\u200a*HAIR SPACE*\u200a,
\u202f*NARROW NO-BREAK SPACE*\u202f,
\u205f*MEDIUM MATHEMATICAL SPACE*\u205f,
\u3000*IDEOGRAPHIC SPACE*\u3000,
\u2028*LINE SEPARATOR*\u2028
""",
u"""\

    
        text separated by
        
            newline
        \n\
        or \n\
        
            space
         or one of
        \xa0
        
            NO-BREAK SPACE
        \xa0,
        \u1680
        
            OGHAM SPACE MARK
        \u1680,
        \u2000
        
            EN QUAD
        \u2000,
        \u2001
        
            EM QUAD
        \u2001,
        \u2002
        
            EN SPACE
        \u2002,
        \u2003
        
            EM SPACE
        \u2003,
        \u2004
        
            THREE-PER-EM SPACE
        \u2004,
        \u2005
        
            FOUR-PER-EM SPACE
        \u2005,
        \u2006
        
            SIX-PER-EM SPACE
        \u2006,
        \u2007
        
            FIGURE SPACE
        \u2007,
        \u2008
        
            PUNCTUATION SPACE
        \u2008,
        \u2009
        
            THIN SPACE
        \u2009,
        \u200a
        
            HAIR SPACE
        \u200a,
        \u202f
        
            NARROW NO-BREAK SPACE
        \u202f,
        \u205f
        
            MEDIUM MATHEMATICAL SPACE
        \u205f,
        \u3000
        
            IDEOGRAPHIC SPACE
        \u3000,
    
        
            LINE SEPARATOR
"""],
# « * » ‹ * › « * » ‹ * › « * » ‹ * › French,
[u"""\
"Quoted" markup start-string (matched openers & closers) -> no markup:

'*' "*" (*) <*> [*] {*}
⁅*⁆

Some international quoting styles:
‘*’ “*” English, ...,
„*“ ‚*‘ »*« ›*‹ German, Czech, ...,
„*” «*» Romanian,
“*„ ‘*‚ Greek,
「*」 『*』traditional Chinese,
”*” ’*’ »*» ›*› Swedish, Finnish,
„*” ‚*’ Polish,
„*” »*« ’*’ Hungarian,

But this is „*’ emphasized »*‹.
""",
u"""\

    
        "Quoted" markup start-string (matched openers & closers) -> no markup:
    
        '*' "*" (*) <*> [*] {*}
        ⁅*⁆
    
        Some international quoting styles:
        ‘*’ “*” English, ...,
        „*“ ‚*‘ »*« ›*‹ German, Czech, ...,
        „*” «*» Romanian,
        “*„ ‘*‚ Greek,
        「*」 『*』traditional Chinese,
        ”*” ’*’ »*» ›*› Swedish, Finnish,
        „*” ‚*’ Polish,
        „*” »*« ’*’ Hungarian,
    
        But this is „
        
            ’ emphasized »
        ‹.
"""],
]


if __name__ == '__main__':
    import unittest
    unittest.main(defaultTest='suite')