use static version instead of dynamic git tag info

test build in develop and pull requests
Merge branch 'pyproject.toml' of https://github.com/KOLANICH-libs/markdownify.py into KOLANICH-libs-pyproject.toml
2024-07-14 22:34:30 +02:00 · 2024-07-14 22:10:01 +02:00 · 2024-07-14 21:53:09 +02:00 · 2022-11-10 15:29:25 +03:00 · 2022-11-10 15:27:26 +03:00 · 2022-11-10 15:25:39 +03:00
9 changed files with 117 additions and 231 deletions
--- a/README.rst
+++ b/README.rst
@@ -128,9 +128,9 @@ escape_underscores
  Defaults to ``True``.

 escape_misc
-  If set to ``True``, escape miscellaneous punctuation characters
+  If set to ``False``, do not escape miscellaneous punctuation characters
  that sometimes have Markdown significance in text.
-  Defaults to ``False``.
+  Defaults to ``True``.

 keep_inline_images_in
  Images are converted to their alt-text when the images are located inside
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -7,8 +7,7 @@ import six
 convert_heading_re = re.compile(r'convert_h(\d+)')
 line_beginning_re = re.compile(r'^', re.MULTILINE)
 whitespace_re = re.compile(r'[\t ]+')
-all_whitespace_re = re.compile(r'[\t \r\n]+')
-newline_whitespace_re = re.compile(r'[\t \r\n]*[\r\n][\t \r\n]*')
+all_whitespace_re = re.compile(r'[\s]+')
 html_heading_re = re.compile(r'h[1-6]')


@@ -67,23 +66,6 @@ def _todict(obj):
    return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_'))


-def should_remove_whitespace_inside(el):
-    """Return to remove whitespace immediately inside a block-level element."""
-    if not el or not el.name:
-        return False
-    if html_heading_re.match(el.name) is not None:
-        return True
-    return el.name in ('p', 'blockquote',
-                       'ol', 'ul', 'li',
-                       'table', 'thead', 'tbody', 'tfoot',
-                       'tr', 'td', 'th')
-
-
-def should_remove_whitespace_outside(el):
-    """Return to remove whitespace immediately outside a block-level element."""
-    return should_remove_whitespace_inside(el) or (el and el.name == 'pre')
-
-
 class MarkdownConverter(object):
    class DefaultOptions:
        autolinks = True
@@ -94,7 +76,7 @@ class MarkdownConverter(object):
        default_title = False
        escape_asterisks = True
        escape_underscores = True
-        escape_misc = False
+        escape_misc = True
        heading_style = UNDERLINED
        keep_inline_images_in = []
        newline_style = SPACES
@@ -137,23 +119,27 @@ class MarkdownConverter(object):
        if not children_only and (isHeading or isCell):
            convert_children_as_inline = True

-        # Remove whitespace-only textnodes just before, after or
-        # inside block-level elements.
-        should_remove_inside = should_remove_whitespace_inside(node)
-        for el in node.children:
-            # Only extract (remove) whitespace-only text node if any of the
-            # conditions is true:
-            # - el is the first element in its parent (block-level)
-            # - el is the last element in its parent (block-level)
-            # - el is adjacent to a block-level node
-            can_extract = (should_remove_inside and (not el.previous_sibling
-                                                     or not el.next_sibling)
-                           or should_remove_whitespace_outside(el.previous_sibling)
-                           or should_remove_whitespace_outside(el.next_sibling))
-            if (isinstance(el, NavigableString)
-                    and six.text_type(el).strip() == ''
-                    and can_extract):
-                el.extract()
+        # Remove whitespace-only textnodes in purely nested nodes
+        def is_nested_node(el):
+            return el and el.name in ['ol', 'ul', 'li',
+                                      'table', 'thead', 'tbody', 'tfoot',
+                                      'tr', 'td', 'th']
+
+        if is_nested_node(node):
+            for el in node.children:
+                # Only extract (remove) whitespace-only text node if any of the
+                # conditions is true:
+                # - el is the first element in its parent
+                # - el is the last element in its parent
+                # - el is adjacent to an nested node
+                can_extract = (not el.previous_sibling
+                               or not el.next_sibling
+                               or is_nested_node(el.previous_sibling)
+                               or is_nested_node(el.next_sibling))
+                if (isinstance(el, NavigableString)
+                        and six.text_type(el).strip() == ''
+                        and can_extract):
+                    el.extract()

        # Convert the children first
        for el in node.children:
@@ -162,13 +148,7 @@ class MarkdownConverter(object):
            elif isinstance(el, NavigableString):
                text += self.process_text(el)
            else:
-                text_strip = text.rstrip('\n')
-                newlines_left = len(text) - len(text_strip)
-                next_text = self.process_tag(el, convert_children_as_inline)
-                next_text_strip = next_text.lstrip('\n')
-                newlines_right = len(next_text) - len(next_text_strip)
-                newlines = '\n' * max(newlines_left, newlines_right)
-                text = text_strip + newlines + next_text_strip
+                text += self.process_tag(el, convert_children_as_inline)

        if not children_only:
            convert_fn = getattr(self, 'convert_%s' % node.name, None)
@@ -182,26 +162,18 @@ class MarkdownConverter(object):

        # normalize whitespace if we're not inside a preformatted element
        if not el.find_parent('pre'):
-            if self.options['wrap']:
-                text = all_whitespace_re.sub(' ', text)
-            else:
-                text = newline_whitespace_re.sub('\n', text)
-                text = whitespace_re.sub(' ', text)
+            text = whitespace_re.sub(' ', text)

        # escape special characters if we're not inside a preformatted or code element
        if not el.find_parent(['pre', 'code', 'kbd', 'samp']):
            text = self.escape(text)

-        # remove leading whitespace at the start or just after a
-        # block-level element; remove traliing whitespace at the end
-        # or just before a block-level element.
-        if (should_remove_whitespace_outside(el.previous_sibling)
-                or (should_remove_whitespace_inside(el.parent)
-                    and not el.previous_sibling)):
-            text = text.lstrip()
-        if (should_remove_whitespace_outside(el.next_sibling)
-                or (should_remove_whitespace_inside(el.parent)
-                    and not el.next_sibling)):
+        # remove trailing whitespaces if any of the following condition is true:
+        # - current text node is the last node in li
+        # - current text node is followed by an embedded list
+        if (el.parent.name == 'li'
+                and (not el.next_sibling
+                     or el.next_sibling.name in ['ul', 'ol'])):
            text = text.rstrip()

        return text
@@ -236,32 +208,20 @@ class MarkdownConverter(object):
        if not text:
            return ''
        if self.options['escape_misc']:
-            text = re.sub(r'([\\&<`[>~=+|])', r'\\\1', text)
-            # A sequence of one or more consecutive '-', preceded and
-            # followed by whitespace or start/end of fragment, might
-            # be confused with an underline of a header, or with a
-            # list marker.
-            text = re.sub(r'(\s|^)(-+(?:\s|$))', r'\1\\\2', text)
-            # A sequence of up to six consecutive '#', preceded and
-            # followed by whitespace or start/end of fragment, might
-            # be confused with an ATX heading.
-            text = re.sub(r'(\s|^)(#{1,6}(?:\s|$))', r'\1\\\2', text)
-            # '.' or ')' preceded by up to nine digits might be
-            # confused with a list item.
-            text = re.sub(r'((?:\s|^)[0-9]{1,9})([.)](?:\s|$))', r'\1\\\2',
-                          text)
+            text = re.sub(r'([\\&<`[>~#=+|-])', r'\\\1', text)
+            text = re.sub(r'([0-9])([.)])', r'\1\\\2', text)
        if self.options['escape_asterisks']:
            text = text.replace('*', r'\*')
        if self.options['escape_underscores']:
            text = text.replace('_', r'\_')
        return text

-    def indent(self, text, columns):
-        return line_beginning_re.sub(' ' * columns, text) if text else ''
+    def indent(self, text, level):
+        return line_beginning_re.sub('\t' * level, text) if text else ''

    def underline(self, text, pad_char):
        text = (text or '').rstrip()
-        return '\n\n%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''
+        return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''

    def convert_a(self, el, text, convert_as_inline):
        prefix, suffix, text = chomp(text)
@@ -286,7 +246,7 @@ class MarkdownConverter(object):
    def convert_blockquote(self, el, text, convert_as_inline):

        if convert_as_inline:
-            return ' ' + text.strip() + ' '
+            return text

        return '\n' + (line_beginning_re.sub('> ', text.strip()) + '\n\n') if text else ''

@@ -320,11 +280,10 @@ class MarkdownConverter(object):
        if style == UNDERLINED and n <= 2:
            line = '=' if n == 1 else '-'
            return self.underline(text, line)
-        text = all_whitespace_re.sub(' ', text)
        hashes = '#' * n
        if style == ATX_CLOSED:
-            return '\n%s %s %s\n\n' % (hashes, text, hashes)
-        return '\n%s %s\n\n' % (hashes, text)
+            return '%s %s %s\n\n' % (hashes, text, hashes)
+        return '%s %s\n\n' % (hashes, text)

    def convert_hr(self, el, text, convert_as_inline):
        return '\n\n---\n\n'
@@ -358,8 +317,8 @@ class MarkdownConverter(object):
            el = el.parent
        if nested:
            # remove trailing newline if nested
-            return '\n' + text.rstrip()
-        return '\n\n' + text + ('\n' if before_paragraph else '')
+            return '\n' + self.indent(text, 1).rstrip()
+        return text + ('\n' if before_paragraph else '')

    convert_ul = convert_list
    convert_ol = convert_list
@@ -380,33 +339,17 @@ class MarkdownConverter(object):
                el = el.parent
            bullets = self.options['bullets']
            bullet = bullets[depth % len(bullets)]
-        bullet = bullet + ' '
-        text = (text or '').strip()
-        text = self.indent(text, len(bullet))
-        if text:
-            text = bullet + text[len(bullet):]
-        return '%s\n' % text
+        return '%s %s\n' % (bullet, (text or '').strip())

    def convert_p(self, el, text, convert_as_inline):
        if convert_as_inline:
-            return ' ' + text.strip() + ' '
+            return text
        if self.options['wrap']:
-            # Preserve newlines (and preceding whitespace) resulting
-            # from <br> tags.  Newlines in the input have already been
-            # replaced by spaces.
-            lines = text.split('\n')
-            new_lines = []
-            for line in lines:
-                line = line.lstrip()
-                line_no_trailing = line.rstrip()
-                trailing = line[len(line_no_trailing):]
-                line = fill(line,
-                            width=self.options['wrap_width'],
-                            break_long_words=False,
-                            break_on_hyphens=False)
-                new_lines.append(line + trailing)
-            text = '\n'.join(new_lines)
-        return '\n\n%s\n\n' % text if text else ''
+            text = fill(text,
+                        width=self.options['wrap_width'],
+                        break_long_words=False,
+                        break_on_hyphens=False)
+        return '%s\n\n' % text if text else ''

    def convert_pre(self, el, text, convert_as_inline):
        if not text:
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

 [project]
 name = "markdownify"
-version = "0.14.0"
+version = "0.13.0"
 authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
 description = "Convert HTML to markdown."
 readme = "README.rst"
--- a/tests/test_advanced.py
+++ b/tests/test_advanced.py
@@ -14,7 +14,7 @@ def test_chomp():

 def test_nested():
    text = md('<p>This is an <a href="http://example.com/">example link</a>.</p>')
-    assert text == '\n\nThis is an [example link](http://example.com/).\n\n'
+    assert text == 'This is an [example link](http://example.com/).\n\n'


 def test_ignore_comments():
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -11,4 +11,3 @@ def test_soup():

 def test_whitespace():
    assert md(' a  b \t\t c ') == ' a b c '
-    assert md(' a  b \n\n c ') == ' a b\nc '
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,4 +1,4 @@
-from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, SPACES, UNDERSCORE
+from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE


 def inline_tests(tag, markup):
@@ -66,7 +66,7 @@ def test_blockquote_with_paragraph():

 def test_blockquote_nested():
    text = md('<blockquote>And she was like <blockquote>Hello</blockquote></blockquote>')
-    assert text == '\n> And she was like\n> > Hello\n\n'
+    assert text == '\n> And she was like \n> > Hello\n\n'


 def test_br():
@@ -112,39 +112,36 @@ def test_em():


 def test_header_with_space():
-    assert md('<h3>\n\nHello</h3>') == '\n### Hello\n\n'
-    assert md('<h3>Hello\n\n\nWorld</h3>') == '\n### Hello World\n\n'
-    assert md('<h4>\n\nHello</h4>') == '\n#### Hello\n\n'
-    assert md('<h5>\n\nHello</h5>') == '\n##### Hello\n\n'
-    assert md('<h5>\n\nHello\n\n</h5>') == '\n##### Hello\n\n'
-    assert md('<h5>\n\nHello   \n\n</h5>') == '\n##### Hello\n\n'
+    assert md('<h3>\n\nHello</h3>') == '### Hello\n\n'
+    assert md('<h4>\n\nHello</h4>') == '#### Hello\n\n'
+    assert md('<h5>\n\nHello</h5>') == '##### Hello\n\n'
+    assert md('<h5>\n\nHello\n\n</h5>') == '##### Hello\n\n'
+    assert md('<h5>\n\nHello   \n\n</h5>') == '##### Hello\n\n'


 def test_h1():
-    assert md('<h1>Hello</h1>') == '\n\nHello\n=====\n\n'
+    assert md('<h1>Hello</h1>') == 'Hello\n=====\n\n'


 def test_h2():
-    assert md('<h2>Hello</h2>') == '\n\nHello\n-----\n\n'
+    assert md('<h2>Hello</h2>') == 'Hello\n-----\n\n'


 def test_hn():
-    assert md('<h3>Hello</h3>') == '\n### Hello\n\n'
-    assert md('<h4>Hello</h4>') == '\n#### Hello\n\n'
-    assert md('<h5>Hello</h5>') == '\n##### Hello\n\n'
-    assert md('<h6>Hello</h6>') == '\n###### Hello\n\n'
+    assert md('<h3>Hello</h3>') == '### Hello\n\n'
+    assert md('<h4>Hello</h4>') == '#### Hello\n\n'
+    assert md('<h5>Hello</h5>') == '##### Hello\n\n'
+    assert md('<h6>Hello</h6>') == '###### Hello\n\n'


 def test_hn_chained():
-    assert md('<h1>First</h1>\n<h2>Second</h2>\n<h3>Third</h3>', heading_style=ATX) == '\n# First\n\n## Second\n\n### Third\n\n'
-    assert md('X<h1>First</h1>', heading_style=ATX) == 'X\n# First\n\n'
-    assert md('X<h1>First</h1>', heading_style=ATX_CLOSED) == 'X\n# First #\n\n'
-    assert md('X<h1>First</h1>') == 'X\n\nFirst\n=====\n\n'
+    assert md('<h1>First</h1>\n<h2>Second</h2>\n<h3>Third</h3>', heading_style=ATX) == '# First\n\n\n## Second\n\n\n### Third\n\n'
+    assert md('X<h1>First</h1>', heading_style=ATX) == 'X# First\n\n'


 def test_hn_nested_tag_heading_style():
-    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX_CLOSED) == '\n# A P C #\n\n'
-    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX) == '\n# A P C\n\n'
+    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX_CLOSED) == '# A P C #\n\n'
+    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX) == '# A P C\n\n'


 def test_hn_nested_simple_tag():
@@ -160,12 +157,12 @@ def test_hn_nested_simple_tag():
    ]

    for tag, markdown in tag_to_markdown:
-        assert md('<h3>A <' + tag + '>' + tag + '</' + tag + '> B</h3>') == '\n### A ' + markdown + ' B\n\n'
+        assert md('<h3>A <' + tag + '>' + tag + '</' + tag + '> B</h3>') == '### A ' + markdown + ' B\n\n'

-    assert md('<h3>A <br>B</h3>', heading_style=ATX) == '\n### A B\n\n'
+    assert md('<h3>A <br>B</h3>', heading_style=ATX) == '### A B\n\n'

    # Nested lists not supported
-    # assert md('<h3>A <ul><li>li1</i><li>l2</li></ul></h3>', heading_style=ATX) == '\n### A li1 li2 B\n\n'
+    # assert md('<h3>A <ul><li>li1</i><li>l2</li></ul></h3>', heading_style=ATX) == '### A li1 li2 B\n\n'


 def test_hn_nested_img():
@@ -175,18 +172,18 @@ def test_hn_nested_img():
        ("alt='Alt Text' title='Optional title'", "Alt Text", " \"Optional title\""),
    ]
    for image_attributes, markdown, title in image_attributes_to_markdown:
-        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '\n### A' + (' ' + markdown + ' ' if markdown else ' ') + 'B\n\n'
-        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '\n### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'
+        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
+        assert md('<h3>A <img src="/path/to/img.jpg" ' + image_attributes + '/> B</h3>', keep_inline_images_in=['h3']) == '### A ![' + markdown + '](/path/to/img.jpg' + title + ') B\n\n'


 def test_hn_atx_headings():
-    assert md('<h1>Hello</h1>', heading_style=ATX) == '\n# Hello\n\n'
-    assert md('<h2>Hello</h2>', heading_style=ATX) == '\n## Hello\n\n'
+    assert md('<h1>Hello</h1>', heading_style=ATX) == '# Hello\n\n'
+    assert md('<h2>Hello</h2>', heading_style=ATX) == '## Hello\n\n'


 def test_hn_atx_closed_headings():
-    assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '\n# Hello #\n\n'
-    assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '\n## Hello ##\n\n'
+    assert md('<h1>Hello</h1>', heading_style=ATX_CLOSED) == '# Hello #\n\n'
+    assert md('<h2>Hello</h2>', heading_style=ATX_CLOSED) == '## Hello ##\n\n'


 def test_head():
@@ -196,7 +193,7 @@ def test_head():
 def test_hr():
    assert md('Hello<hr>World') == 'Hello\n\n---\n\nWorld'
    assert md('Hello<hr />World') == 'Hello\n\n---\n\nWorld'
-    assert md('<p>Hello</p>\n<hr>\n<p>World</p>') == '\n\nHello\n\n---\n\nWorld\n\n'
+    assert md('<p>Hello</p>\n<hr>\n<p>World</p>') == 'Hello\n\n\n\n\n---\n\n\nWorld\n\n'


 def test_i():
@@ -213,23 +210,12 @@ def test_kbd():


 def test_p():
-    assert md('<p>hello</p>') == '\n\nhello\n\n'
-    assert md('<p>123456789 123456789</p>') == '\n\n123456789 123456789\n\n'
-    assert md('<p>123456789\n\n\n123456789</p>') == '\n\n123456789\n123456789\n\n'
-    assert md('<p>123456789\n\n\n123456789</p>', wrap=True, wrap_width=80) == '\n\n123456789 123456789\n\n'
-    assert md('<p>123456789 123456789</p>', wrap=True, wrap_width=10) == '\n\n123456789\n123456789\n\n'
-    assert md('<p><a href="https://example.com">Some long link</a></p>', wrap=True, wrap_width=10) == '\n\n[Some long\nlink](https://example.com)\n\n'
-    assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
-    assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345\\\n67890\n\n'
-    assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345  \n67890\n\n'
-    assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345  \n67890\n\n'
-    assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
-    assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=50, newline_style=BACKSLASH) == '\n\n12345678901\\\n12345\n\n'
-    assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n12345678901  \n12345\n\n'
-    assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=50, newline_style=SPACES) == '\n\n12345678901  \n12345\n\n'
-    assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '\n\n1234 5678\n9012\\\n67890\n\n'
-    assert md('<p>1234 5678 9012<br />67890</p>', wrap=True, wrap_width=10, newline_style=SPACES) == '\n\n1234 5678\n9012  \n67890\n\n'
-    assert md('First<p>Second</p><p>Third</p>Fourth') == 'First\n\nSecond\n\nThird\n\nFourth'
+    assert md('<p>hello</p>') == 'hello\n\n'
+    assert md('<p>123456789 123456789</p>') == '123456789 123456789\n\n'
+    assert md('<p>123456789 123456789</p>', wrap=True, wrap_width=10) == '123456789\n123456789\n\n'
+    assert md('<p><a href="https://example.com">Some long link</a></p>', wrap=True, wrap_width=10) == '[Some long\nlink](https://example.com)\n\n'
+    assert md('<p>12345<br />67890</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345\\\n67890\n\n'
+    assert md('<p>12345678901<br />12345</p>', wrap=True, wrap_width=10, newline_style=BACKSLASH) == '12345678901\\\n12345\n\n'


 def test_pre():
@@ -303,13 +289,3 @@ def test_lang_callback():
    assert md('<pre class="python">test\n    foo\nbar</pre>', code_language_callback=callback) == '\n```python\ntest\n    foo\nbar\n```\n'
    assert md('<pre class="javascript"><code>test\n    foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n    foo\nbar\n```\n'
    assert md('<pre class="javascript"><code class="javascript">test\n    foo\nbar</code></pre>', code_language_callback=callback) == '\n```javascript\ntest\n    foo\nbar\n```\n'
-
-
-def test_spaces():
-    assert md('<p> a b </p> <p> c d </p>') == '\n\na b\n\nc d\n\n'
-    assert md('<p> <i>a</i> </p>') == '\n\n*a*\n\n'
-    assert md('test <p> again </p>') == 'test\n\nagain\n\n'
-    assert md('test <blockquote> text </blockquote> after') == 'test\n> text\n\nafter'
-    assert md(' <ol> <li> x </li> <li> y </li> </ol> ') == '\n\n1. x\n2. y\n'
-    assert md(' <ul> <li> x </li> <li> y </li> </ol> ') == '\n\n* x\n* y\n'
-    assert md('test <pre> foo </pre> bar') == 'test\n```\n foo \n```\nbar'
--- a/tests/test_escaping.py
+++ b/tests/test_escaping.py
@@ -1,5 +1,3 @@
-import warnings
-from bs4 import MarkupResemblesLocatorWarning
 from markdownify import markdownify as md


@@ -14,7 +12,7 @@ def test_underscore():


 def test_xml_entities():
-    assert md('&amp;', escape_misc=True) == r'\&'
+    assert md('&amp;') == r'\&'


 def test_named_entities():
@@ -27,49 +25,23 @@ def test_hexadecimal_entities():


 def test_single_escaping_entities():
-    assert md('&amp;amp;', escape_misc=True) == r'\&amp;'
+    assert md('&amp;amp;') == r'\&amp;'


-def test_misc():
-    # ignore the bs4 warning that "1.2" or "*" looks like a filename
-    warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)
-
-    assert md('\\*', escape_misc=True) == r'\\\*'
-    assert md('&lt;foo>', escape_misc=True) == r'\<foo\>'
-    assert md('# foo', escape_misc=True) == r'\# foo'
-    assert md('#5', escape_misc=True) == r'#5'
-    assert md('5#', escape_misc=True) == '5#'
-    assert md('####### foo', escape_misc=True) == r'####### foo'
-    assert md('> foo', escape_misc=True) == r'\> foo'
-    assert md('~~foo~~', escape_misc=True) == r'\~\~foo\~\~'
-    assert md('foo\n===\n', escape_misc=True) == 'foo\n\\=\\=\\=\n'
-    assert md('---\n', escape_misc=True) == '\\---\n'
-    assert md('- test', escape_misc=True) == r'\- test'
-    assert md('x - y', escape_misc=True) == r'x \- y'
-    assert md('test-case', escape_misc=True) == 'test-case'
-    assert md('x-', escape_misc=True) == 'x-'
-    assert md('-y', escape_misc=True) == '-y'
-    assert md('+ x\n+ y\n', escape_misc=True) == '\\+ x\n\\+ y\n'
-    assert md('`x`', escape_misc=True) == r'\`x\`'
-    assert md('[text](link)', escape_misc=True) == r'\[text](link)'
-    assert md('1. x', escape_misc=True) == r'1\. x'
-    # assert md('1<span>.</span> x', escape_misc=True) == r'1\. x'
-    assert md('<span>1.</span> x', escape_misc=True) == r'1\. x'
-    assert md(' 1. x', escape_misc=True) == r' 1\. x'
-    assert md('123456789. x', escape_misc=True) == r'123456789\. x'
-    assert md('1234567890. x', escape_misc=True) == r'1234567890. x'
-    assert md('A1. x', escape_misc=True) == r'A1. x'
-    assert md('1.2', escape_misc=True) == r'1.2'
-    assert md('not a number. x', escape_misc=True) == r'not a number. x'
-    assert md('1) x', escape_misc=True) == r'1\) x'
-    # assert md('1<span>)</span> x', escape_misc=True) == r'1\) x'
-    assert md('<span>1)</span> x', escape_misc=True) == r'1\) x'
-    assert md(' 1) x', escape_misc=True) == r' 1\) x'
-    assert md('123456789) x', escape_misc=True) == r'123456789\) x'
-    assert md('1234567890) x', escape_misc=True) == r'1234567890) x'
-    assert md('(1) x', escape_misc=True) == r'(1) x'
-    assert md('A1) x', escape_misc=True) == r'A1) x'
-    assert md('1)x', escape_misc=True) == r'1)x'
-    assert md('not a number) x', escape_misc=True) == r'not a number) x'
-    assert md('|not table|', escape_misc=True) == r'\|not table\|'
-    assert md(r'\ &lt;foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'
+def text_misc():
+    assert md('\\*') == r'\\\*'
+    assert md('<foo>') == r'\<foo\>'
+    assert md('# foo') == r'\# foo'
+    assert md('> foo') == r'\> foo'
+    assert md('~~foo~~') == r'\~\~foo\~\~'
+    assert md('foo\n===\n') == 'foo\n\\=\\=\\=\n'
+    assert md('---\n') == '\\-\\-\\-\n'
+    assert md('+ x\n+ y\n') == '\\+ x\n\\+ y\n'
+    assert md('`x`') == r'\`x\`'
+    assert md('[text](link)') == r'\[text](link)'
+    assert md('1. x') == r'1\. x'
+    assert md('not a number. x') == r'not a number. x'
+    assert md('1) x') == r'1\) x'
+    assert md('not a number) x') == r'not a number) x'
+    assert md('|not table|') == r'\|not table\|'
+    assert md(r'\ <foo> &amp;amp; | ` `', escape_misc=False) == r'\ <foo> &amp; | ` `'
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -41,21 +41,19 @@ nested_ols = """


 def test_ol():
-    assert md('<ol><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
-    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '\n\n3. a\n4. b\n'
-    assert md('foo<ol start="3"><li>a</li><li>b</li></ol>bar') == 'foo\n\n3. a\n4. b\n\nbar'
-    assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
-    assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
-    assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '\n\n1. a\n2. b\n'
-    assert md('<ol start="1234"><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ol>') == '\n\n1234. first para\n      \n      second para\n1235. third para\n      \n      fourth para\n'
+    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
+    assert md('<ol start="-1"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="foo"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="1.5"><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'


 def test_nested_ols():
-    assert md(nested_ols) == '\n\n1. 1\n   1. a\n      1. I\n      2. II\n      3. III\n   2. b\n   3. c\n2. 2\n3. 3\n'
+    assert md(nested_ols) == '\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'


 def test_ul():
-    assert md('<ul><li>a</li><li>b</li></ul>') == '\n\n* a\n* b\n'
+    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
    assert md("""<ul>
     <li>
             a
@@ -63,13 +61,11 @@ def test_ul():
     <li> b </li>
     <li>   c
     </li>
- </ul>""") == '\n\n* a\n* b\n* c\n'
-    assert md('<ul><li><p>first para</p><p>second para</p></li><li><p>third para</p><p>fourth para</p></li></ul>') == '\n\n* first para\n  \n  second para\n* third para\n  \n  fourth para\n'
+ </ul>""") == '* a\n* b\n* c\n'


 def test_inline_ul():
-    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == '\n\nfoo\n\n* a\n* b\n\nbar\n\n'
-    assert md('foo<ul><li>bar</li></ul>baz') == 'foo\n\n* bar\n\nbaz'
+    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n* a\n* b\n\nbar\n\n'


 def test_nested_uls():
@@ -77,12 +73,12 @@ def test_nested_uls():
    Nested ULs should alternate bullet characters.

    """
-    assert md(nested_uls) == '\n\n* 1\n  + a\n    - I\n    - II\n    - III\n  + b\n  + c\n* 2\n* 3\n'
+    assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'


 def test_bullets():
-    assert md(nested_uls, bullets='-') == '\n\n- 1\n  - a\n    - I\n    - II\n    - III\n  - b\n  - c\n- 2\n- 3\n'
+    assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'


 def test_li_text():
-    assert md('<ul><li>foo <a href="#">bar</a></li><li>foo bar  </li><li>foo <b>bar</b>   <i>space</i>.</ul>') == '\n\n* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
+    assert md('<ul><li>foo <a href="#">bar</a></li><li>foo bar  </li><li>foo <b>bar</b>   <i>space</i>.</ul>') == '* foo [bar](#)\n* foo bar\n* foo **bar** *space*.\n'
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -242,7 +242,7 @@ def test_table():
    assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
-    assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith Jackson | 50 |\n| Eve | Jackson Smith | 94 |\n\n'
+    assert md(table_with_linebreaks) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith  Jackson | 50 |\n| Eve | Jackson  Smith | 94 |\n\n'
    assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_head_body_missing_head) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
Author	SHA1	Message	Date
AlexVonB	4c23c0655f	use static version instead of dynamic git tag info	2024-07-14 22:34:30 +02:00
AlexVonB	e2ace9d633	test build in develop and pull requests	2024-07-14 22:10:01 +02:00
AlexVonB	a5615f7d80	Merge branch 'pyproject.toml' of https://github.com/KOLANICH-libs/markdownify.py into KOLANICH-libs-pyproject.toml	2024-07-14 21:53:09 +02:00
KOLANICH	67100595ae	Migrated the metadata into `PEP 621`-compliant `pyproject.toml`, got rid of `setup.cfg`.	2022-11-10 15:29:25 +03:00
KOLANICH	deba8b5e54	Started populating version automatically from git tags using `setuptools_scm`.	2022-11-10 15:27:26 +03:00
KOLANICH	ca88e4e49d	Move the metadata from `setup.py` into `setup.cfg`. Added `pyproject.toml`. Removed `setup.py` - it is no longer needed. Got rid of tests erroroneously finding their way into the wheel.	2022-11-10 15:25:39 +03:00