Files
python-markdownify/tests/test_escaping.py
Joseph Myers 46af45bb3c Escape all characters with Markdown significance (#118)
* Escape all characters with Markdown significance

There are many punctuation characters that sometimes have significance
in Markdown; more systematically escape them all (based on a new
escape_misc configuration option).

A limited attempt is made to limit the escaping of '.' and ')' to the
context where they might have Markdown significance (after a number,
where they can indicate an ordered list item); no such attempt is made
for the other characters (and even that limiting of '.' and ')' may
not be entirely safe in all cases, as it's possible the HTML could
have the number outside the block being escaped in one go,
e.g. `<span>1</span>.`.

---------

Co-authored-by: AlexVonB <AlexVonB@users.noreply.github.com>
2024-04-04 21:42:58 +02:00

48 lines
1.4 KiB
Python

from markdownify import markdownify as md
def test_asterisks():
assert md('*hey*dude*') == r'\*hey\*dude\*'
assert md('*hey*dude*', escape_asterisks=False) == r'*hey*dude*'
def test_underscore():
assert md('_hey_dude_') == r'\_hey\_dude\_'
assert md('_hey_dude_', escape_underscores=False) == r'_hey_dude_'
def test_xml_entities():
assert md('&amp;') == r'\&'
def test_named_entities():
assert md('&raquo;') == u'\xbb'
def test_hexadecimal_entities():
# This looks to be a bug in BeautifulSoup (fixed in bs4) that we have to work around.
assert md('&#x27;') == '\x27'
def test_single_escaping_entities():
assert md('&amp;amp;') == r'\&amp;'
def text_misc():
assert md('\\*') == r'\\\*'
assert md('<foo>') == r'\<foo\>'
assert md('# foo') == r'\# foo'
assert md('> foo') == r'\> foo'
assert md('~~foo~~') == r'\~\~foo\~\~'
assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
assert md('---\n') == '\\-\\-\\-\n'
assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
assert md('`x`') == r'\`x\`'
assert md('[text](link)') == r'\[text](link)'
assert md('1. x') == r'1\. x'
assert md('not a number. x') == r'not a number. x'
assert md('1) x') == r'1\) x'
assert md('not a number) x') == r'not a number) x'
assert md('|not table|') == r'\|not table\|'
assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'