Merge branch 'develop'

2021-05-30 19:10:49 +02:00 · 2021-05-30 11:20:32 +02:00 · 2021-05-21 14:18:14 +02:00 · 2021-05-18 10:42:27 +02:00 · 2021-05-16 18:41:24 +02:00 · 2021-05-02 13:49:30 +02:00
8 changed files with 17 additions and 97 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,4 +8,3 @@
 /MANIFEST
 /venv
 build/
-.vscode/settings.json
--- a/README.rst
+++ b/README.rst
@@ -100,29 +100,6 @@ Options may be specified as kwargs to the ``markdownify`` function, or as a
 nested ``Options`` class in ``MarkdownConverter`` subclasses.


-Creating Custom Converters
-==========================
-
-If you have a special usecase that calls for a special conversion, you can
-always inherit from ``MarkdownConverter`` and override the method you want to
-change:
-
-.. code:: python
-
-    from markdownify import MarkdownConverter
-
-    class ImageBlockConverter(MarkdownConverter):
-        """
-        Create a custom MarkdownConverter that adds two newlines after an image
-        """
-        def convert_img(self, el, text, convert_as_inline):
-            return super().convert_img(el, text, convert_as_inline) + '\n\n'
-
-    # Create shorthand method for conversion
-    def md(html, **options):
-        return ImageBlockConverter(**options).convert(html)
-
-
 Development
 ===========

--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -76,7 +76,6 @@ class MarkdownConverter(object):
        strong_em_symbol = ASTERISK
        sub_symbol = ''
        sup_symbol = ''
-        code_language = ''

    class Options(DefaultOptions):
        pass
@@ -97,14 +96,11 @@ class MarkdownConverter(object):

    def process_tag(self, node, convert_as_inline, children_only=False):
        text = ''
-
-        # markdown headings or cells can't include
-        # block elements (elements w/newlines)
+        # markdown headings can't include block elements (elements w/newlines)
        isHeading = html_heading_re.match(node.name) is not None
-        isCell = node.name in ['td', 'th']
        convert_children_as_inline = convert_as_inline

-        if not children_only and (isHeading or isCell):
+        if not children_only and isHeading:
            convert_children_as_inline = True

        # Remove whitespace-only textnodes in purely nested nodes
@@ -146,26 +142,22 @@ class MarkdownConverter(object):
        return text

    def process_text(self, el):
-        text = six.text_type(el) or ''
+        text = six.text_type(el)

        # dont remove any whitespace when handling pre or code in pre
-        if not (el.parent.name == 'pre'
-                or (el.parent.name == 'code'
-                    and el.parent.parent.name == 'pre')):
-            text = whitespace_re.sub(' ', text)
+        if (el.parent.name == 'pre'
+                or (el.parent.name == 'code' and el.parent.parent.name == 'pre')):
+            return escape(text or '')

-        if el.parent.name != 'code':
-            text = escape(text)
+        cleaned_text = escape(whitespace_re.sub(' ', text or ''))

        # remove trailing whitespaces if any of the following condition is true:
        # - current text node is the last node in li
        # - current text node is followed by an embedded list
-        if (el.parent.name == 'li'
-                and (not el.next_sibling
-                     or el.next_sibling.name in ['ul', 'ol'])):
-            text = text.rstrip()
+        if el.parent.name == 'li' and (not el.next_sibling or el.next_sibling.name in ['ul', 'ol']):
+            return cleaned_text.rstrip()

-        return text
+        return cleaned_text

    def __getattr__(self, attr):
        # Handle headings
@@ -204,6 +196,8 @@ class MarkdownConverter(object):
        prefix, suffix, text = chomp(text)
        if not text:
            return ''
+        if convert_as_inline:
+            return text
        href = el.get('href')
        title = el.get('title')
        # For the replacement see #29: text nodes underscores are escaped
@@ -315,7 +309,7 @@ class MarkdownConverter(object):
                el = el.parent
            bullets = self.options['bullets']
            bullet = bullets[depth % len(bullets)]
-        return '%s %s\n' % (bullet, (text or '').strip())
+        return '%s %s\n' % (bullet, text or '')

    def convert_p(self, el, text, convert_as_inline):
        if convert_as_inline:
@@ -325,7 +319,7 @@ class MarkdownConverter(object):
    def convert_pre(self, el, text, convert_as_inline):
        if not text:
            return ''
-        return '\n```%s\n%s\n```\n' % (self.options['code_language'], text)
+        return '\n```\n%s\n```\n' % text

    convert_s = convert_del

--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
 pkgmeta = {
    '__title__': 'markdownify',
    '__author__': 'Matthew Tretter',
-    '__version__': '0.9.4',
+    '__version__': '0.9.0',
 }


--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -70,7 +70,6 @@ def test_br():

 def test_code():
    inline_tests('code', '`')
-    assert md('<code>this_should_not_escape</code>') == '`this_should_not_escape`'


 def test_del():
@@ -132,6 +131,8 @@ def test_hn_nested_simple_tag():


 def test_hn_nested_img():
+    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
+    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
    image_attributes_to_markdown = [
        ("", ""),
        ("alt='Alt Text'", "Alt Text"),
@@ -210,8 +211,3 @@ def test_sub():
 def test_sup():
    assert md('<sup>foo</sup>') == 'foo'
    assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
-
-
-def test_lang():
-    assert md('<pre>test\n    foo\nbar</pre>', code_language='python') == '\n```python\ntest\n    foo\nbar\n```\n'
-    assert md('<pre><code>test\n    foo\nbar</code></pre>', code_language='javascript') == '\n```javascript\ntest\n    foo\nbar\n```\n'
--- a/tests/test_custom_converter.py
+++ b/tests/test_custom_converter.py
@@ -1,18 +0,0 @@
-from markdownify import MarkdownConverter
-
-
-class ImageBlockConverter(MarkdownConverter):
-    """
-    Create a custom MarkdownConverter that adds two newlines after an image
-    """
-    def convert_img(self, el, text, convert_as_inline):
-        return super().convert_img(el, text, convert_as_inline) + '\n\n'
-
-
-def test_img():
-    # Create shorthand method for conversion
-    def md(html, **options):
-        return ImageBlockConverter(**options).convert(html)
-
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")\n\n'
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)\n\n'
--- a/tests/test_lists.py
+++ b/tests/test_lists.py
@@ -51,14 +51,6 @@ def test_nested_ols():

 def test_ul():
    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
-    assert md("""<ul>
-     <li>
-             a
-     </li>
-     <li> b </li>
-     <li>   c
-     </li>
- </ul>""") == '* a\n* b\n* c\n'


 def test_inline_ul():
--- a/tests/test_tables.py
+++ b/tests/test_tables.py
@@ -39,25 +39,6 @@ table_with_html_content = """<table>
 </table>"""


-table_with_paragraphs = """<table>
-    <tr>
-        <th>Firstname</th>
-        <th><p>Lastname</p></th>
-        <th>Age</th>
-    </tr>
-    <tr>
-        <td><p>Jill</p></td>
-        <td><p>Smith</p></td>
-        <td><p>50</p></td>
-    </tr>
-    <tr>
-        <td>Eve</td>
-        <td>Jackson</td>
-        <td>94</td>
-    </tr>
-</table>"""
-
-
 table_with_header_column = """<table>
    <tr>
        <th>Firstname</th>
@@ -143,7 +124,6 @@ table_missing_head = """<table>
 def test_table():
    assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
-    assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
    assert md(table_missing_text) == '\n\n|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |\n\n'
Author	SHA1	Message	Date
AlexVonB	59417ab115	Merge branch 'develop'	2021-05-30 19:10:49 +02:00
AlexVonB	917b01e548	Merge branch 'develop'	2021-05-30 11:20:32 +02:00
AlexVonB	652714859d	Merge branch 'develop'	2021-05-21 14:18:14 +02:00
AlexVonB	ea5b22824b	Merge branch 'develop'	2021-05-18 10:42:27 +02:00
AlexVonB	ec5858e42f	Merge branch 'develop'	2021-05-16 18:41:24 +02:00
AlexVonB	02bb914ef3	Merge branch 'develop'	2021-05-02 13:49:30 +02:00
AlexVonB	21c0d034d0	Merge branch 'develop'	2021-05-02 10:51:00 +02:00
AlexVonB	e3ddc789a2	Merge branch 'develop'	2021-04-22 12:43:27 +02:00
AlexVonB	2d0cd97323	Merge branch 'develop'	2021-04-22 12:13:03 +02:00
AlexVonB	ec185e2e9c	Merge branch 'develop'	2021-02-21 23:09:55 +01:00
AlexVonB	079d1721aa	Merge branch 'develop'	2021-02-21 20:58:34 +01:00
AlexVonB	bf24df3e2e	bump to v0.6.3	2021-01-12 22:43:18 +01:00
AlexVonB	15329588b1	Merge branch 'develop'	2021-01-12 22:42:58 +01:00
AlexVonB	34ad8485fa	bump to v0.6.2	2021-01-12 22:40:03 +01:00
AlexVonB	f0ce934bf8	Merge branch 'develop'	2021-01-12 22:39:47 +01:00
AlexVonB	99cd237f27	Merge branch 'develop'	2021-01-04 10:22:02 +01:00
AlexVonB	2bde8d3e8e	Merge branch 'develop'	2021-01-02 16:49:28 +01:00
AlexVonB	8c9b029756	Merge branch 'develop'	2020-09-01 18:10:07 +02:00
AlexVonB	ae50065872	Merge branch 'develop'	2020-08-18 18:53:10 +02:00