Merge branch 'develop'

bump to v0.9.1
Revert "add figure/figcaption"
2021-07-11 13:14:29 +02:00 · 2021-07-11 13:13:31 +02:00 · 2021-07-11 13:12:16 +02:00 · 2021-07-11 13:10:59 +02:00 · 2021-06-30 13:02:42 +02:00 · 2021-06-27 15:53:23 +02:00
5 changed files with 55 additions and 11 deletions
--- a/README.rst
+++ b/README.rst
@@ -100,6 +100,29 @@ Options may be specified as kwargs to the ``markdownify`` function, or as a
 nested ``Options`` class in ``MarkdownConverter`` subclasses.


+Creating Custom Converters
+=========================
+
+If you have a special usecase that calls for a special conversion, you can
+always inherit from ``MarkdownConverter`` and override the method you want to
+change:
+
+.. code:: python
+
+    from markdownify import MarkdownConverter
+
+    class ImageBlockConverter(MarkdownConverter):
+        """
+        Create a custom MarkdownConverter that adds two newlines after an image
+        """
+        def convert_img(self, el, text, convert_as_inline):
+            return super().convert_img(el, text, convert_as_inline) + '\n\n'
+
+    # Create shorthand method for conversion
+    def md(html, **options):
+        return ImageBlockConverter(**options).convert(html)
+
+
 Development
 ===========

--- a/markdownify/init.py
+++ b/markdownify/init.py
@@ -142,22 +142,26 @@ class MarkdownConverter(object):
        return text

    def process_text(self, el):
-        text = six.text_type(el)
+        text = six.text_type(el) or ''

        # dont remove any whitespace when handling pre or code in pre
-        if (el.parent.name == 'pre'
-                or (el.parent.name == 'code' and el.parent.parent.name == 'pre')):
-            return escape(text or '')
+        if not (el.parent.name == 'pre'
+                or (el.parent.name == 'code'
+                    and el.parent.parent.name == 'pre')):
+            text = whitespace_re.sub(' ', text)

-        cleaned_text = escape(whitespace_re.sub(' ', text or ''))
+        if el.parent.name != 'code':
+            text = escape(text)

        # remove trailing whitespaces if any of the following condition is true:
        # - current text node is the last node in li
        # - current text node is followed by an embedded list
-        if el.parent.name == 'li' and (not el.next_sibling or el.next_sibling.name in ['ul', 'ol']):
-            return cleaned_text.rstrip()
+        if (el.parent.name == 'li'
+                and (not el.next_sibling
+                     or el.next_sibling.name in ['ul', 'ol'])):
+            text = text.rstrip()

-        return cleaned_text
+        return text

    def __getattr__(self, attr):
        # Handle headings
--- a/setup.py
+++ b/setup.py
@@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read()
 pkgmeta = {
    '__title__': 'markdownify',
    '__author__': 'Matthew Tretter',
-    '__version__': '0.9.0',
+    '__version__': '0.9.1',
 }


--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -70,6 +70,7 @@ def test_br():

 def test_code():
    inline_tests('code', '`')
+    assert md('<code>this_should_not_escape</code>') == '`this_should_not_escape`'


 def test_del():
@@ -131,8 +132,6 @@ def test_hn_nested_simple_tag():


 def test_hn_nested_img():
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")'
-    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
    image_attributes_to_markdown = [
        ("", ""),
        ("alt='Alt Text'", "Alt Text"),
--- a/tests/test_custom_converter.py
+++ b/tests/test_custom_converter.py
@@ -0,0 +1,18 @@
+from markdownify import MarkdownConverter
+
+
+class ImageBlockConverter(MarkdownConverter):
+    """
+    Create a custom MarkdownConverter that adds two newlines after an image
+    """
+    def convert_img(self, el, text, convert_as_inline):
+        return super().convert_img(el, text, convert_as_inline) + '\n\n'
+
+
+def test_img():
+    # Create shorthand method for conversion
+    def md(html, **options):
+        return ImageBlockConverter(**options).convert(html)
+
+    assert md('<img src="/path/to/img.jpg" alt="Alt text" title="Optional title" />') == '![Alt text](/path/to/img.jpg "Optional title")\n\n'
+    assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)\n\n'
Author	SHA1	Message	Date
AlexVonB	cb73590623	Merge branch 'develop'	2021-07-11 13:14:29 +02:00
AlexVonB	22180a166d	bump to v0.9.1	2021-07-11 13:13:31 +02:00
AlexVonB	16d8a0e1f7	Revert "add figure/figcaption" This reverts commit `828e116530`.	2021-07-11 13:12:16 +02:00
AlexVonB	4aa6cf2a24	rewrote text processing to not escape _ in code fixes #47	2021-07-11 13:10:59 +02:00
AlexVonB	828e116530	add figure/figcaption for #46	2021-06-30 13:02:42 +02:00
AlexVonB	62e9f0de02	add examples for custom converters closes #46	2021-06-27 15:53:23 +02:00