Merge branch 'develop'

2020-09-01 18:10:07 +02:00 · 2020-08-18 18:53:10 +02:00
3 changed files with 21 additions and 95 deletions
--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -6,7 +6,6 @@ import six
 convert_heading_re = re.compile(r'convert_h(\d+)')
 line_beginning_re = re.compile(r'^', re.MULTILINE)
 whitespace_re = re.compile(r'[\r\n\s\t ]+')
-html_heading_re = re.compile(r'h[1-6]')


 # Heading styles
@@ -62,28 +61,22 @@ class MarkdownConverter(object):

    def convert(self, html):
        soup = BeautifulSoup(html, 'html.parser')
-        return self.process_tag(soup, convert_as_inline=False, children_only=True)
+        return self.process_tag(soup, children_only=True)

-    def process_tag(self, node, convert_as_inline, children_only=False):
+    def process_tag(self, node, children_only=False):
        text = ''
-        # markdown headings can't include block elements (elements w/newlines)
-        isHeading = html_heading_re.match(node.name) is not None
-        convert_children_as_inline = convert_as_inline
-
-        if not children_only and isHeading:
-            convert_children_as_inline = True

        # Convert the children first
        for el in node.children:
            if isinstance(el, NavigableString):
                text += self.process_text(six.text_type(el))
            else:
-                text += self.process_tag(el, convert_children_as_inline)
+                text += self.process_tag(el)

        if not children_only:
            convert_fn = getattr(self, 'convert_%s' % node.name, None)
            if convert_fn and self.should_convert_tag(node.name):
-                text = convert_fn(node, text, convert_as_inline)
+                text = convert_fn(node, text)

        return text

@@ -96,8 +89,8 @@ class MarkdownConverter(object):
        if m:
            n = int(m.group(1))

-            def convert_tag(el, text, convert_as_inline):
-                return self.convert_hn(n, el, text, convert_as_inline)
+            def convert_tag(el, text):
+                return self.convert_hn(n, el, text)

            convert_tag.__name__ = 'convert_h%s' % n
            setattr(self, convert_tag.__name__, convert_tag)
@@ -123,12 +116,10 @@ class MarkdownConverter(object):
        text = (text or '').rstrip()
        return '%s\n%s\n\n' % (text, pad_char * len(text)) if text else ''

-    def convert_a(self, el, text, convert_as_inline):
+    def convert_a(self, el, text):
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
-        if convert_as_inline:
-            return text
        href = el.get('href')
        title = el.get('title')
        if self.options['autolinks'] and text == href and not title:
@@ -137,32 +128,22 @@ class MarkdownConverter(object):
        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
        return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text

-    def convert_b(self, el, text, convert_as_inline):
-        return self.convert_strong(el, text, convert_as_inline)
-
-    def convert_blockquote(self, el, text, convert_as_inline):
-
-        if convert_as_inline:
-            return text
+    def convert_b(self, el, text):
+        return self.convert_strong(el, text)

+    def convert_blockquote(self, el, text):
        return '\n' + line_beginning_re.sub('> ', text) if text else ''

-    def convert_br(self, el, text, convert_as_inline):
-        if convert_as_inline:
-            return ""
-
+    def convert_br(self, el, text):
        return '  \n'

-    def convert_em(self, el, text, convert_as_inline):
+    def convert_em(self, el, text):
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
        return '%s*%s*%s' % (prefix, text, suffix)

-    def convert_hn(self, n, el, text, convert_as_inline):
-        if convert_as_inline:
-            return text
-
+    def convert_hn(self, n, el, text):
        style = self.options['heading_style']
        text = text.rstrip()
        if style == UNDERLINED and n <= 2:
@@ -173,14 +154,10 @@ class MarkdownConverter(object):
            return '%s %s %s\n\n' % (hashes, text, hashes)
        return '%s %s\n\n' % (hashes, text)

-    def convert_i(self, el, text, convert_as_inline):
-        return self.convert_em(el, text, convert_as_inline)
-
-    def convert_list(self, el, text, convert_as_inline):
-
-        # Converting a list to inline is undefined.
-        # Ignoring convert_to_inline for list.
+    def convert_i(self, el, text):
+        return self.convert_em(el, text)

+    def convert_list(self, el, text):
        nested = False
        while el:
            if el.name == 'li':
@@ -195,7 +172,7 @@ class MarkdownConverter(object):
    convert_ul = convert_list
    convert_ol = convert_list

-    def convert_li(self, el, text, convert_as_inline):
+    def convert_li(self, el, text):
        parent = el.parent
        if parent is not None and parent.name == 'ol':
            if parent.get("start"):
@@ -213,25 +190,20 @@ class MarkdownConverter(object):
            bullet = bullets[depth % len(bullets)]
        return '%s %s\n' % (bullet, text or '')

-    def convert_p(self, el, text, convert_as_inline):
-        if convert_as_inline:
-            return text
+    def convert_p(self, el, text):
        return '%s\n\n' % text if text else ''

-    def convert_strong(self, el, text, convert_as_inline):
+    def convert_strong(self, el, text):
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
        return '%s**%s**%s' % (prefix, text, suffix)

-    def convert_img(self, el, text, convert_as_inline):
+    def convert_img(self, el, text):
        alt = el.attrs.get('alt', None) or ''
        src = el.attrs.get('src', None) or ''
        title = el.attrs.get('title', None) or ''
        title_part = ' "%s"' % title.replace('"', r'\"') if title else ''
-        if convert_as_inline:
-            return alt
-
        return '![%s](%s%s)' % (alt, src, title_part)


--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
 pkgmeta = {
    '__title__': 'markdownify',
    '__author__': 'Matthew Tretter',
-    '__version__': '0.6.0',
+    '__version__': '0.5.3',
 }


--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -107,52 +107,6 @@ def test_hn():
    assert md('<h6>Hello</h6>') == '###### Hello\n\n'


-def test_hn_nested_tag_heading_style():
-    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX_CLOSED) == '# A P C #\n\n'
-    assert md('<h1>A <p>P</p> C </h1>', heading_style=ATX) == '# A P C\n\n'
-
-
-def test_hn_nested_simple_tag():
-    tag_to_markdown = [
-        ("strong", "**strong**"),
-        ("b", "**b**"),
-        ("em", "*em*"),
-        ("i", "*i*"),
-        ("p", "p"),
-        ("a", "a"),
-        ("div", "div"),
-        ("blockquote", "blockquote"),
-    ]
-
-    for tag, markdown in tag_to_markdown:
-        assert md('<h3>A <' + tag + '>' + tag + '</' + tag + '> B</h3>') == '### A ' + markdown + ' B\n\n'
-
-    assert md('<h3>A <br>B</h3>', heading_style=ATX) == '### A B\n\n'
-
-    # Nested lists not supported
-    # assert md('<h3>A <ul><li>li1</i><li>l2</li></ul></h3>', heading_style=ATX) == '### A li1 li2 B\n\n'
-
-
-def test_hn_nested_img():
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
-    image_attributes_to_markdown = [
-        ("", ""),
-        ("alt='Alt Text'", "Alt Text"),
-        ("alt='Alt Text' title='Optional title'", "Alt Text"),
-    ]
-    for image_attributes, markdown in image_attributes_to_markdown:
-        assert md('<h3>A <img src="/path/to/img.jpg " ' + image_attributes + '/> B</h3>') == '### A ' + markdown + ' B\n\n'
-
-
-def test_hr():
-    assert md('<hr>hr</hr>') == 'hr'
-
-
-def test_head():
-    assert md('<head>head</head>') == 'head'
-
-
 def test_atx_headings():
    assert md('<h1>Hello</h1>', heading_style=ATX) == '# Hello\n\n'
    assert md('<h2>Hello</h2>', heading_style=ATX) == '## Hello\n\n'
Author	SHA1	Message	Date
AlexVonB	8c9b029756	Merge branch 'develop'	2020-09-01 18:10:07 +02:00
AlexVonB	ae50065872	Merge branch 'develop'	2020-08-18 18:53:10 +02:00