Fix parsing corrupt html

2020-08-31 13:15:10 +02:00
parent 987a2a9cae
commit 1b3136ad04
2 changed files with 5 additions and 7 deletions
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -6,8 +6,6 @@ import six
 convert_heading_re = re.compile(r'convert_h(\d+)')
 line_beginning_re = re.compile(r'^', re.MULTILINE)
 whitespace_re = re.compile(r'[\r\n\s\t ]+')
-FRAGMENT_ID = '__MARKDOWNIFY_WRAPPER__'
-wrapped = '<div id="%s">%%s</div>' % FRAGMENT_ID


 # Heading styles
@@ -62,12 +60,8 @@ class MarkdownConverter(object):
                             ' convert, but not both.')

    def convert(self, html):
-        # We want to take advantage of the html5 parsing, but we don't actually
-        # want a full document. Therefore, we'll mark our fragment with an id,
-        # create the document, and extract the element with the id.
-        html = wrapped % html
        soup = BeautifulSoup(html, 'html.parser')
-        return self.process_tag(soup.find(id=FRAGMENT_ID), children_only=True)
+        return self.process_tag(soup, children_only=True)

    def process_tag(self, node, children_only=False):
        text = ''
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -157,3 +157,7 @@ def test_bullets():
 def test_img():
    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
+
+
+def test_div():
+    assert md('Hello</div> World') == 'Hello World'