Compare commits

..

6 Commits

Author SHA1 Message Date
AlexVonB
4c23c0655f use static version instead of dynamic git tag info 2024-07-14 22:34:30 +02:00
AlexVonB
e2ace9d633 test build in develop and pull requests 2024-07-14 22:10:01 +02:00
AlexVonB
a5615f7d80 Merge branch 'pyproject.toml' of https://github.com/KOLANICH-libs/markdownify.py into KOLANICH-libs-pyproject.toml 2024-07-14 21:53:09 +02:00
KOLANICH
67100595ae Migrated the metadata into PEP 621-compliant pyproject.toml, got rid of setup.cfg. 2022-11-10 15:29:25 +03:00
KOLANICH
deba8b5e54 Started populating version automatically from git tags using setuptools_scm. 2022-11-10 15:27:26 +03:00
KOLANICH
ca88e4e49d Move the metadata from setup.py into setup.cfg.
Added `pyproject.toml`.
Removed `setup.py` - it is no longer needed.
Got rid of tests erroroneously finding their way into the wheel.
2022-11-10 15:25:39 +03:00
9 changed files with 117 additions and 231 deletions

View File

@@ -128,9 +128,9 @@ escape_underscores
Defaults to ``True``.
escape_misc
If set to ``True``, escape miscellaneous punctuation characters
If set to ``False``, do not escape miscellaneous punctuation characters
that sometimes have Markdown significance in text.
Defaults to ``False``.
Defaults to ``True``.
keep_inline_images_in
Images are converted to their alt-text when the images are located inside

View File

@@ -7,8 +7,7 @@ import six
convert_heading_re = re.compile(r'convert_h(\d+)')
line_beginning_re = re.compile(r'^', re.MULTILINE)
whitespace_re = re.compile(r'[\t ]+')
all_whitespace_re = re.compile(r'[\t \r\n]+')
newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
all_whitespace_re = re.compile(r'[\s]+')
html_heading_re = re.compile(r'h[1-6]')
@@ -67,23 +66,6 @@ def _todict(obj):
return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))
def should_remove_whitespace_inside(el):
"""Return to remove whitespace immediately inside a block-level element."""
if not el or not el.name:
return False
if html_heading_re.match(el.name) is not None:
return True
return el.name in ('p', 'blockquote',
'ol', 'ul', 'li',
'table', 'thead', 'tbody', 'tfoot',
'tr', 'td', 'th')
def should_remove_whitespace_outside(el):
"""Return to remove whitespace immediately outside a block-level element."""
return should_remove_whitespace_inside(el) or (el and el.name == 'pre')
class MarkdownConverter(object):
class DefaultOptions:
autolinks = True
@@ -94,7 +76,7 @@ class MarkdownConverter(object):
default_title = False
escape_asterisks = True
escape_underscores = True
escape_misc = False
escape_misc = True
heading_style = UNDERLINED
keep_inline_images_in = []
newline_style = SPACES
@@ -137,23 +119,27 @@ class MarkdownConverter(object):
if not children_only and (isHeading or isCell):
convert_children_as_inline = True
# Remove whitespace-only textnodes just before, after or
# inside block-level elements.
should_remove_inside = should_remove_whitespace_inside(node)
for el in node.children:
# Only extract (remove) whitespace-only text node if any of the
# conditions is true:
# - el is the first element in its parent (block-level)
# - el is the last element in its parent (block-level)
# - el is adjacent to a block-level node
can_extract = (should_remove_inside and (not el.previous_sibling
or not el.next_sibling)
or should_remove_whitespace_outside(el.previous_sibling)
or should_remove_whitespace_outside(el.next_sibling))
if (isinstance(el, NavigableString)
and six.text_type(el).strip() == ''
and can_extract):
el.extract()
# Remove whitespace-only textnodes in purely nested nodes
def is_nested_node(el):
return el and el.name in ['ol', 'ul', 'li',
'table', 'thead', 'tbody', 'tfoot',
'tr', 'td', 'th']
if is_nested_node(node):
for el in node.children:
# Only extract (remove) whitespace-only text node if any of the
# conditions is true:
# - el is the first element in its parent
# - el is the last element in its parent
# - el is adjacent to an nested node
can_extract = (not el.previous_sibling
or not el.next_sibling
or is_nested_node(el.previous_sibling)
or is_nested_node(el.next_sibling))
if (isinstance(el, NavigableString)
and six.text_type(el).strip() == ''
and can_extract):
el.extract()
# Convert the children first
for el in node.children:
@@ -162,13 +148,7 @@ class MarkdownConverter(object):
elif isinstance(el, NavigableString):
text += self.process_text(el)
else:
text_strip = text.rstrip('\n')
newlines_left = len(text) - len(text_strip)
next_text = self.process_tag(el, convert_children_as_inline)
next_text_strip = next_text.lstrip('\n')
newlines_right = len(next_text) - len(next_text_strip)
newlines = '\n' * max(newlines_left, newlines_right)
text = text_strip + newlines + next_text_strip
text += self.process_tag(el, convert_children_as_inline)
if not children_only:
convert_fn = getattr(self, 'convert_%s' % node.name, None)
@@ -182,26 +162,18 @@ class MarkdownConverter(object):
# normalize whitespace if we're not inside a preformatted element
if not el.find_parent('pre'):
if self.options['wrap']:
text = all_whitespace_re.sub(' ', text)
else:
text = newline_whitespace_re.sub('\n', text)
text = whitespace_re.sub(' ', text)
text = whitespace_re.sub(' ', text)
# escape special characters if we're not inside a preformatted or code element
if not el.find_parent(['pre', 'code', 'kbd', 'samp']):
text = self.escape(text)
# remove leading whitespace at the start or just after a
# block-level element; remove traliing whitespace at the end
# or just before a block-level element.
if (should_remove_whitespace_outside(el.previous_sibling)
or (should_remove_whitespace_inside(el.parent)
and not el.previous_sibling)):
text = text.lstrip()
if (should_remove_whitespace_outside(el.next_sibling)
or (should_remove_whitespace_inside(el.parent)
and not el.next_sibling)):
# remove trailing whitespaces if any of the following condition is true:
# - current text node is the last node in li
# - current text node is followed by an embedded list
if (el.parent.name == 'li'
and (not el.next_sibling
or el.next_sibling.name in ['ul', 'ol'])):
text = text.rstrip()
return text
@@ -236,32 +208,20 @@ class MarkdownConverter(object):
if not text:
return ''
if self.options['escape_misc']:
text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text)
# A sequence of one or more consecutive '-', preceded and
# followed by whitespace or start/end of fragment, might
# be confused with an underline of a header, or with a
# list marker.
text = re.sub(r'(\s|^)(-+(?:\s|$))', r'\1\\\2', text)
# A sequence of up to six consecutive '#', preceded and
# followed by whitespace or start/end of fragment, might
# be confused with an ATX heading.
text = re.sub(r'(\s|^)(#{1,6}(?:\s|$))', r'\1\\\2', text)
# '.' or ')' preceded by up to nine digits might be
# confused with a list item.
text = re.sub(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))', r'\1\\\2',
text)
text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
if self.options['escape_asterisks']:
text = text.replace('*', r'\*')
if self.options['escape_underscores']:
text = text.replace('_', r'\_')
return text
def indent(self, text, columns):
return line_beginning_re.sub(' ' * columns, text) if text else ''
def indent(self, text, level):
return line_beginning_re.sub('\t' * level, text) if text else ''
def underline(self, text, pad_char):
text = (text or '').rstrip()
return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
def convert_a(self, el, text, convert_as_inline):
prefix, suffix, text = chomp(text)
@@ -286,7 +246,7 @@ class MarkdownConverter(object):
def convert_blockquote(self, el, text, convert_as_inline):
if convert_as_inline:
return ' ' + text.strip() + ' '
return text
return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''
@@ -320,11 +280,10 @@ class MarkdownConverter(object):
if style == UNDERLINED and n <= 2:
line = '=' if n == 1 else '-'
return self.underline(text, line)
text = all_whitespace_re.sub(' ', text)
hashes = '#' * n
if style == ATX_CLOSED:
return '\n%s %s %s\n\n' % (hashes, text, hashes)
return '\n%s %s\n\n' % (hashes, text)
return '%s %s %s\n\n' % (hashes, text, hashes)
return '%s %s\n\n' % (hashes, text)
def convert_hr(self, el, text, convert_as_inline):
return '\n\n---\n\n'
@@ -358,8 +317,8 @@ class MarkdownConverter(object):
el = el.parent
if nested:
# remove trailing newline if nested
return '\n' + text.rstrip()
return '\n\n' + text + ('\n' if before_paragraph else '')
return '\n' + self.indent(text, 1).rstrip()
return text + ('\n' if before_paragraph else '')
convert_ul = convert_list
convert_ol = convert_list
@@ -380,33 +339,17 @@ class MarkdownConverter(object):
el = el.parent
bullets = self.options['bullets']
bullet = bullets[depth % len(bullets)]
bullet = bullet + ' '
text = (text or '').strip()
text = self.indent(text, len(bullet))
if text:
text = bullet + text[len(bullet):]
return '%s\n' % text
return '%s %s\n' % (bullet, (text or '').strip())
def convert_p(self, el, text, convert_as_inline):
if convert_as_inline:
return ' ' + text.strip() + ' '
return text
if self.options['wrap']:
# Preserve newlines (and preceding whitespace) resulting
# from <br> tags. Newlines in the input have already been
# replaced by spaces.
lines = text.split('\n')
new_lines = []
for line in lines:
line = line.lstrip()
line_no_trailing = line.rstrip()
trailing = line[len(line_no_trailing):]
line = fill(line,
width=self.options['wrap_width'],
break_long_words=False,
break_on_hyphens=False)
new_lines.append(line + trailing)
text = '\n'.join(new_lines)
return '\n\n%s\n\n' % text if text else ''
text = fill(text,
width=self.options['wrap_width'],
break_long_words=False,
break_on_hyphens=False)
return '%s\n\n' % text if text else ''
def convert_pre(self, el, text, convert_as_inline):
if not text:

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project]
name = "markdownify"
version = "0.14.0"
version = "0.13.0"
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
description = "Convert HTML to markdown."
readme = "README.rst"

View File

@@ -14,7 +14,7 @@ def test_chomp():
def test_nested():
text = md('<p>This is an <a href="http://example.com/">example link</a>.</p>')
assert text == '\n\nThis is an [example link](http://example.com/).\n\n'
assert text == 'This is an [example link](http://example.com/).\n\n'
def test_ignore_comments():

View File

@@ -11,4 +11,3 @@ def test_soup():
def test_whitespace():
assert md(' a b \t\t c ') == ' a b c '
assert md(' a b \n\n c ') == ' a b\nc '

View File

@@ -1,4 +1,4 @@
from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE
def inline_tests(tag, markup):
@@ -66,7 +66,7 @@ def test_blockquote_with_paragraph():
def test_blockquote_nested():
text = md('<blockquote>And she was like <blockquote>Hello</blockquote></blockquote>')
assert text == '\n> And she was like\n> > Hello\n\n'
assert text == '\n> And she was like \n> > Hello\n\n'
def test_br():
@@ -112,39 +112,36 @@ def test_em():
def test_header_with_space():
assert md('<h3>\n\nHello</h3>') == '\n### Hello\n\n'
assert md('<h3>Hello\n\n\nWorld</h3>') == '\n### Hello World\n\n'
assert md('<h4>\n\nHello</h4>') == '\n#### Hello\n\n'
assert md('<h5>\n\nHello</h5>') == '\n##### Hello\n\n'
assert md('<h5>\n\nHello\n\n</h5>') == '\n##### Hello\n\n'
assert md('<h5>\n\nHello \n\n</h5>') == '\n##### Hello\n\n'
assert md('<h3>\n\nHello</h3>') == '### Hello\n\n'
assert md('<h4>\n\nHello</h4>') == '#### Hello\n\n'
assert md('<h5>\n\nHello</h5>') == '##### Hello\n\n'
assert md('<h5>\n\nHello\n\n</h5>') == '##### Hello\n\n'
assert md('<h5>\n\nHello \n\n</h5>') == '##### Hello\n\n'
def test_h1():
assert md('<h1>Hello</h1>') == '\n\nHello\n=====\n\n'
assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'
def test_h2():
assert md('<h2>Hello</h2>') == '\n\nHello\n-----\n\n'
assert md('<h2>Hello</h2>') == 'Hello\n-----\n\n'
def test_hn():
assert md('<h3>Hello</h3>') == '\n### Hello\n\n'
assert md('<h4>Hello</h4>') == '\n#### Hello\n\n'
assert md('<h5>Hello</h5>') == '\n##### Hello\n\n'
assert md('<h6>Hello</h6>') == '\n###### Hello\n\n'
assert md('<h3>Hello</h3>') == '### Hello\n\n'
assert md('<h4>Hello</h4>') == '#### Hello\n\n'
assert md('<h5>Hello</h5>') == '##### Hello\n\n'
assert md('<h6>Hello</h6>') == '###### Hello\n\n'
def test_hn_chained():
assert md('<h1>First</h1>\n<h2>Second</h2>\n<h3>Third</h3>', heading_style=ATX) == '\n# First\n\n## Second\n\n### Third\n\n'
assert md('X<h1>First</h1>', heading_style=ATX) == 'X\n# First\n\n'
assert md('X<h1>First</h1>', heading_style=ATX_CLOSED) == 'X\n# First #\n\n'
assert md('X<h1>First</h1>') == 'X\n\nFirst\n=====\n\n'
assert md('<h1>First</h1>\n<h2>Second</h2>\n<h3>Third</h3>', heading_style=ATX) == '# First\n\n\n## Second\n\n\n### Third\n\n'
assert md('X<h1>First</h1>', heading_style=ATX) == 'X# First\n\n'
def test_hn_nested_tag_heading_style():
assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX_CLOSED) == '\n# A P C #\n\n'
assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX) == '\n# A P C\n\n'
assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX_CLOSED) == '# A P C #\n\n'
assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX) == '# A P C\n\n'
def test_hn_nested_simple_tag():
@@ -160,12 +157,12 @@ def test_hn_nested_simple_tag():
]
for tag, markdown in tag_to_markdown:
assert md('<h3>A <' + tag + '>' + tag + '</' + tag + '> B</h3>') == '\n### A ' + markdown + ' B\n\n'
assert md('<h3>A <' + tag + '>' + tag + '</' + tag + '> B</h3>') == '### A ' + markdown + ' B\n\n'
assert md('<h3>A <br>B</h3>', heading_style=ATX) == '\n### A B\n\n'
assert md('<h3>A <br>B</h3>', heading_style=ATX) == '### A B\n\n'
# Nested lists not supported
# assert md('<h3>A <ul><li>li1</i><li>l2</li></ul></h3>', heading_style=ATX) == '\n### A li1 li2 B\n\n'
# assert md('<h3>A <ul><li>li1</i><li>l2</li></ul></h3>', heading_style=ATX) == '### A li1 li2 B\n\n'
def test_hn_nested_img():
@@ -175,18 +172,18 @@ def test_hn_nested_img():
("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
]
for image_attributes, markdown, title in image_attributes_to_markdown:
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '\n### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '\n### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'
def test_hn_atx_headings():
assert md('<h1>Hello</h1>', heading_style=ATX) == '\n# Hello\n\n'
assert md('<h2>Hello</h2>', heading_style=ATX) == '\n## Hello\n\n'
assert md('<h1>Hello</h1>', heading_style=ATX) == '# Hello\n\n'
assert md('<h2>Hello</h2>', heading_style=ATX) == '## Hello\n\n'
def test_hn_atx_closed_headings():
assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '\n# Hello #\n\n'
assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '\n## Hello ##\n\n'
assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '# Hello #\n\n'
assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '## Hello ##\n\n'
def test_head():
@@ -196,7 +193,7 @@ def test_head():
def test_hr():
assert md('Hello<hr>World') == 'Hello\n\n---\n\nWorld'
assert md('Hello<hr />World') == 'Hello\n\n---\n\nWorld'
assert md('<p>Hello</p>\n<hr>\n<p>World</p>') == '\n\nHello\n\n---\n\nWorld\n\n'
assert md('<p>Hello</p>\n<hr>\n<p>World</p>') == 'Hello\n\n\n\n\n---\n\n\nWorld\n\n'
def test_i():
@@ -213,23 +210,12 @@ def test_kbd():
def test_p():
assert md('<p>hello</p>') == '\n\nhello\n\n'
assert md('<p>123456789 123456789</p>') == '\n\n123456789 123456789\n\n'
assert md('<p>123456789\n\n\n123456789</p>') == '\n\n123456789\n123456789\n\n'
assert md('<p>123456789\n\n\n123456789</p>', wrap=True, wrap_width=80) == '\n\n123456789 123456789\n\n'
assert md('<p>123456789 123456789</p>', wrap=True, wrap_width=10) == '\n\n123456789\n123456789\n\n'
assert md('<p><a href="https://example.com">Some long link</a></p>', wrap=True, wrap_width=10) == '\n\n[Some long\nlink](https://example.com)\n\n'
assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345 \n67890\n\n'
assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345678901 \n12345\n\n'
assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n1234 5678\n9012\\\n67890\n\n'
assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n1234 5678\n9012 \n67890\n\n'
assert md('First<p>Second</p><p>Third</p>Fourth') == 'First\n\nSecond\n\nThird\n\nFourth'
assert md('<p>hello</p>') == 'hello\n\n'
assert md('<p>123456789 123456789</p>') == '123456789 123456789\n\n'
assert md('<p>123456789 123456789</p>', wrap=True, wrap_width=10) == '123456789\n123456789\n\n'
assert md('<p><a href="https://example.com">Some long link</a></p>', wrap=True, wrap_width=10) == '[Some long\nlink](https://example.com)\n\n'
assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'
def test_pre():
@@ -303,13 +289,3 @@ def test_lang_callback():
assert md('<pre class="python">test\n foo\nbar</pre>', code_language_callback=callback) == '\n```python\ntest\n foo\nbar\n```\n'
assert md('<pre class="javascript"><code>test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
assert md('<pre class="javascript"><code class="javascript">test\n foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n foo\nbar\n```\n'
def test_spaces():
assert md('<p> a b </p> <p> c d </p>') == '\n\na b\n\nc d\n\n'
assert md('<p> <i>a</i> </p>') == '\n\n*a*\n\n'
assert md('test <p> again </p>') == 'test\n\nagain\n\n'
assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
assert md('test <pre> foo </pre> bar') == 'test\n```\n foo \n```\nbar'

View File

@@ -1,5 +1,3 @@
import warnings
from bs4 import MarkupResemblesLocatorWarning
from markdownify import markdownify as md
@@ -14,7 +12,7 @@ def test_underscore():
def test_xml_entities():
assert md('&amp;', escape_misc=True) == r'\&'
assert md('&amp;') == r'\&'
def test_named_entities():
@@ -27,49 +25,23 @@ def test_hexadecimal_entities():
def test_single_escaping_entities():
assert md('&amp;amp;', escape_misc=True) == r'\&amp;'
assert md('&amp;amp;') == r'\&amp;'
def test_misc():
# ignore the bs4 warning that "1.2" or "*" looks like a filename
warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
assert md('\\*', escape_misc=True) == r'\\\*'
assert md('&lt;foo>', escape_misc=True) == r'\<foo\>'
assert md('# foo', escape_misc=True) == r'\# foo'
assert md('#5', escape_misc=True) == r'#5'
assert md('5#', escape_misc=True) == '5#'
assert md('####### foo', escape_misc=True) == r'####### foo'
assert md('> foo', escape_misc=True) == r'\> foo'
assert md('~~foo~~', escape_misc=True) == r'\~\~foo\~\~'
assert md('foo\n===\n', escape_misc=True) == 'foo\n\\=\\=\\=\n'
assert md('---\n', escape_misc=True) == '\\---\n'
assert md('- test', escape_misc=True) == r'\- test'
assert md('x - y', escape_misc=True) == r'x \- y'
assert md('test-case', escape_misc=True) == 'test-case'
assert md('x-', escape_misc=True) == 'x-'
assert md('-y', escape_misc=True) == '-y'
assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n'
assert md('`x`', escape_misc=True) == r'\`x\`'
assert md('[text](link)', escape_misc=True) == r'\[text](link)'
assert md('1. x', escape_misc=True) == r'1\. x'
# assert md('1<span>.</span> x', escape_misc=True) == r'1\. x'
assert md('<span>1.</span> x', escape_misc=True) == r'1\. x'
assert md(' 1. x', escape_misc=True) == r' 1\. x'
assert md('123456789. x', escape_misc=True) == r'123456789\. x'
assert md('1234567890. x', escape_misc=True) == r'1234567890. x'
assert md('A1. x', escape_misc=True) == r'A1. x'
assert md('1.2', escape_misc=True) == r'1.2'
assert md('not a number. x', escape_misc=True) == r'not a number. x'
assert md('1) x', escape_misc=True) == r'1\) x'
# assert md('1<span>)</span> x', escape_misc=True) == r'1\) x'
assert md('<span>1)</span> x', escape_misc=True) == r'1\) x'
assert md(' 1) x', escape_misc=True) == r' 1\) x'
assert md('123456789) x', escape_misc=True) == r'123456789\) x'
assert md('1234567890) x', escape_misc=True) == r'1234567890) x'
assert md('(1) x', escape_misc=True) == r'(1) x'
assert md('A1) x', escape_misc=True) == r'A1) x'
assert md('1)x', escape_misc=True) == r'1)x'
assert md('not a number) x', escape_misc=True) == r'not a number) x'
assert md('|not table|', escape_misc=True) == r'\|not table\|'
assert md(r'\ &lt;foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'
def text_misc():
assert md('\\*') == r'\\\*'
assert md('<foo>') == r'\<foo\>'
assert md('# foo') == r'\# foo'
assert md('> foo') == r'\> foo'
assert md('~~foo~~') == r'\~\~foo\~\~'
assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
assert md('---\n') == '\\-\\-\\-\n'
assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
assert md('`x`') == r'\`x\`'
assert md('[text](link)') == r'\[text](link)'
assert md('1. x') == r'1\. x'
assert md('not a number. x') == r'not a number. x'
assert md('1) x') == r'1\) x'
assert md('not a number) x') == r'not a number) x'
assert md('|not table|') == r'\|not table\|'
assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'

View File

@@ -41,21 +41,19 @@ nested_ols = """
def test_ol():
assert md('<ol><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '\n\n3. a\n4. b\n'
assert md('foo<ol start="3"><li>a</li><li>b</li></ol>bar') == 'foo\n\n3. a\n4. b\n\nbar'
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n \n second para\n1235. third para\n \n fourth para\n'
assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
def test_nested_ols():
assert md(nested_ols) == '\n\n1. 1\n 1. a\n 1. I\n 2. II\n 3. III\n 2. b\n 3. c\n2. 2\n3. 3\n'
assert md(nested_ols) == '\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'
def test_ul():
assert md('<ul><li>a</li><li>b</li></ul>') == '\n\n* a\n* b\n'
assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
assert md("""<ul>
<li>
a
@@ -63,13 +61,11 @@ def test_ul():
<li> b </li>
<li> c
</li>
</ul>""") == '\n\n* a\n* b\n* c\n'
assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n \n second para\n* third para\n \n fourth para\n'
</ul>""") == '* a\n* b\n* c\n'
def test_inline_ul():
assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == '\n\nfoo\n\n* a\n* b\n\nbar\n\n'
assert md('foo<ul><li>bar</li></ul>baz') == 'foo\n\n* bar\n\nbaz'
assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n* a\n* b\n\nbar\n\n'
def test_nested_uls():
@@ -77,12 +73,12 @@ def test_nested_uls():
Nested ULs should alternate bullet characters.
"""
assert md(nested_uls) == '\n\n* 1\n + a\n - I\n - II\n - III\n + b\n + c\n* 2\n* 3\n'
assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
def test_bullets():
assert md(nested_uls, bullets='-') == '\n\n- 1\n - a\n - I\n - II\n - III\n - b\n - c\n- 2\n- 3\n'
assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
def test_li_text():
assert md('<ul><li>foo <a href="#">bar</a></li><li>foo bar </li><li>foo <b>bar</b> <i>space</i>.</ul>') == '\n\n* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
assert md('<ul><li>foo <a href="#">bar</a></li><li>foo bar </li><li>foo <b>bar</b> <i>space</i>.</ul>') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'

View File

@@ -242,7 +242,7 @@ def test_table():
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'