There are various cases in which inline text fails to be separated by (sufficiently many) newlines from adjacent block content. A paragraph needs a blank line (two newlines) separating it from prior text, as does an underlined header; an ATX header needs a single newline separating it from prior text. A list needs at least one newline separating it from prior text, but in general two newlines (for an ordered list starting other than at 1, which will only be recognized given a blank line before). To avoid accumulation of more newlines than necessary, take care when concatenating the results of converting consecutive tags to remove redundant newlines (keeping the greater of the number ending the prior text and the number starting the subsequent text). This is thus an alternative to #108 that tries to avoid the excess newline accumulation that was a concern there, as well as fixing more cases than just paragraphs, and updating tests. Fixes #92 Fixes #98
40 lines
1.2 KiB
Python
40 lines
1.2 KiB
Python
from markdownify import markdownify as md
|
|
|
|
|
|
def test_chomp():
|
|
assert md(' <b></b> ') == ' '
|
|
assert md(' <b> </b> ') == ' '
|
|
assert md(' <b> </b> ') == ' '
|
|
assert md(' <b> </b> ') == ' '
|
|
assert md(' <b>s </b> ') == ' **s** '
|
|
assert md(' <b> s</b> ') == ' **s** '
|
|
assert md(' <b> s </b> ') == ' **s** '
|
|
assert md(' <b> s </b> ') == ' **s** '
|
|
|
|
|
|
def test_nested():
|
|
text = md('<p>This is an <a href="http://example.com/">example link</a>.</p>')
|
|
assert text == '\n\nThis is an [example link](http://example.com/).\n\n'
|
|
|
|
|
|
def test_ignore_comments():
|
|
text = md("<!-- This is a comment -->")
|
|
assert text == ""
|
|
|
|
|
|
def test_ignore_comments_with_other_tags():
|
|
text = md("<!-- This is a comment --><a href='http://example.com/'>example link</a>")
|
|
assert text == "[example link](http://example.com/)"
|
|
|
|
|
|
def test_code_with_tricky_content():
|
|
assert md('<code>></code>') == "`>`"
|
|
assert md('<code>/home/</code><b>username</b>') == "`/home/`**username**"
|
|
assert md('First line <code>blah blah<br />blah blah</code> second line') \
|
|
== "First line `blah blah \nblah blah` second line"
|
|
|
|
|
|
def test_special_tags():
|
|
assert md('<!DOCTYPE html>') == ''
|
|
assert md('<![CDATA[foobar]]>') == 'foobar'
|