From 146104b41fb68a6b6710cf78017d34e5aae3c5a5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi?= <infosec@twelve.pm>
Date: Wed, 20 Nov 2019 10:37:39 +0100
Subject: [PATCH 1/6] Remove newline-only textnodes outside <pre>

---
 markdownify/__init__.py | 5 +++++
 1 file changed, 5 insertions(+)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 25608bf..f97bcbf 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -59,6 +59,11 @@ class MarkdownConverter(object):
     def process_tag(self, node, children_only=False):
         text = ''
 
+        # Clean newline-only textnodes outside <pre>
+        for el in node.children:
+            if node.name != 'pre' and isinstance(el, NavigableString) and six.text_type(el) == '\n':
+                el.extract()
+
         # Convert the children first
         for el in node.children:
             if isinstance(el, NavigableString):

From 7b788bafd4d4d71dce4733dde8c0d88372f148f9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi?= <infosec@twelve.pm>
Date: Thu, 21 Nov 2019 09:35:34 +0100
Subject: [PATCH 2/6] Add nested OL test (for newlines) and correct lists
 nesting

---
 markdownify/__init__.py   |  9 +++++++--
 tests/test_conversions.py | 29 +++++++++++++++++++++++++----
 2 files changed, 32 insertions(+), 6 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index f97bcbf..b7c9545 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -151,14 +151,19 @@ class MarkdownConverter(object):
 
     def convert_list(self, el, text):
         nested = False
+        before_paragraph = False
+        print(el.name, repr(el.next_sibling), repr(text))
+        if el.next_sibling and el.next_sibling.name not in ['ul', 'ol']:
+            print(el.name, repr(el.next_sibling))
+            before_paragraph = True
         while el:
             if el.name == 'li':
                 nested = True
                 break
             el = el.parent
         if nested:
-            text = '\n' + self.indent(text, 1)
-        return '\n' + text + '\n'
+            text = '\n' + self.indent(text, 1).rstrip()
+        return text + ('\n' if before_paragraph else '')
 
     convert_ul = convert_list
     convert_ol = convert_list
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index dfc8d3c..98065bb 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -2,7 +2,7 @@ from markdownify import markdownify as md, ATX, ATX_CLOSED
 import re
 
 
-nested_uls = re.sub('\s+', '', """
+nested_uls = """
     <ul>
         <li>1
             <ul>
@@ -19,7 +19,26 @@ nested_uls = re.sub('\s+', '', """
         </li>
         <li>2</li>
         <li>3</li>
-    </ul>""")
+    </ul>"""
+
+nested_ols = """
+    <ol>
+        <li>1
+            <ol>
+                <li>a
+                    <ol>
+                        <li>I</li>
+                        <li>II</li>
+                        <li>III</li>
+                    </ol>
+                </li>
+                <li>b</li>
+                <li>c</li>
+            </ol>
+        </li>
+        <li>2</li>
+        <li>3</li>
+    </ul>"""
 
 
 def test_a():
@@ -92,6 +111,8 @@ def test_i():
 def test_ol():
     assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
 
+def test_nested_ols():
+    assert md(nested_ols) == '1. 1 \n\t1. a \n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'
 
 def test_p():
     assert md('<p>hello</p>') == 'hello\n\n'
@@ -113,11 +134,11 @@ def test_nested_uls():
     Nested ULs should alternate bullet characters.
 
     """
-    assert md(nested_uls) == '* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t+ b\n\t+ c\n\t\n* 2\n* 3\n'
+    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
 
 
 def test_bullets():
-    assert md(nested_uls, bullets='-') == '- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t\t\n\t- b\n\t- c\n\t\n- 2\n- 3\n'
+    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
 
 
 def test_img():

From 6a0e5d8176294758877a557de075778f97f08d38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi?= <infosec@twelve.pm>
Date: Thu, 21 Nov 2019 09:46:22 +0100
Subject: [PATCH 3/6] Correct inline UL test as paragraphs are followed by two
 newlines

---
 tests/test_conversions.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 98065bb..d27b008 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -126,8 +126,7 @@ def test_ul():
     assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
     
 def test_inline_ul():
-    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo \n* a\n* b\n\nbar'
-
+    assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n* a\n* b\n\nbar\n\n'
 
 def test_nested_uls():
     """

From d23596706d95600bb7bf15b11f7e0108a2d5afbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?R=C3=A9mi?= <infosec@twelve.pm>
Date: Fri, 22 Nov 2019 11:49:22 +0100
Subject: [PATCH 4/6] Remove debug prints

---
 markdownify/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index b7c9545..f0fe118 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -152,9 +152,7 @@ class MarkdownConverter(object):
     def convert_list(self, el, text):
         nested = False
         before_paragraph = False
-        print(el.name, repr(el.next_sibling), repr(text))
         if el.next_sibling and el.next_sibling.name not in ['ul', 'ol']:
-            print(el.name, repr(el.next_sibling))
             before_paragraph = True
         while el:
             if el.name == 'li':

From 2c7e4a0100e053c18417932cc9611f27a0888685 Mon Sep 17 00:00:00 2001
From: SimonIT <simonit.orig@gmail.com>
Date: Wed, 26 Aug 2020 19:47:11 +0200
Subject: [PATCH 5/6] Fix tests

---
 tests/test_conversions.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 3a75907..07aae57 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -140,8 +140,8 @@ def test_i():
 
 
 def test_ol():
-    assert md('<ol><li>a</li><li>b</li></ol>') == '\n1. a\n2. b\n\n'
-    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '\n3. a\n4. b\n\n'
+    assert md('<ol><li>a</li><li>b</li></ol>') == '1. a\n2. b\n'
+    assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
 
 
 def test_nested_ols():
@@ -157,7 +157,7 @@ def test_strong():
 
 
 def test_ul():
-    assert md('<ul><li>a</li><li>b</li></ul>') == '\n* a\n* b\n\n'
+    assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
 
 
 def test_inline_ul():
@@ -169,11 +169,11 @@ def test_nested_uls():
     Nested ULs should alternate bullet characters.
 
     """
-    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n\n'
+    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
 
 
 def test_bullets():
-    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n\n'
+    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
 
 
 def test_img():

From 73800ced360d907f262b4d548926cfff9c9f42f5 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 2 May 2021 13:44:09 +0200
Subject: [PATCH 6/6] fixed whitespace issues at nested lists

---
 markdownify/__init__.py   | 17 +++++++++++------
 tests/test_conversions.py | 12 ++++++------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 1322ac0..da04ebf 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -6,6 +6,7 @@ import six
 convert_heading_re = re.compile(r'convert_h(\d+)')
 line_beginning_re = re.compile(r'^', re.MULTILINE)
 whitespace_re = re.compile(r'[\t ]+')
+all_whitespace_re = re.compile(r'[\s]+')
 html_heading_re = re.compile(r'h[1-6]')
 
 
@@ -83,17 +84,18 @@ class MarkdownConverter(object):
         if not children_only and isHeading:
             convert_children_as_inline = True
 
-        # Clean newline-only textnodes outside <pre>
-        for el in node.children:
-            if node.name != 'pre' and isinstance(el, NavigableString) and six.text_type(el) == '\n':
-                el.extract()
+        # Remove whitespace-only textnodes in lists
+        if node.name in ['ol', 'ul', 'li']:
+            for el in node.children:
+                if isinstance(el, NavigableString) and six.text_type(el).strip() == '':
+                    el.extract()
 
         # Convert the children first
         for el in node.children:
             if isinstance(el, Comment):
                 continue
             elif isinstance(el, NavigableString):
-                text += self.process_text(six.text_type(el))
+                text += self.process_text(el)
             else:
                 text += self.process_tag(el, convert_children_as_inline)
 
@@ -104,7 +106,10 @@ class MarkdownConverter(object):
 
         return text
 
-    def process_text(self, text):
+    def process_text(self, el):
+        text = six.text_type(el)
+        if el.parent.name == 'li':
+            return escape(all_whitespace_re.sub(' ', text or '')).rstrip()
         return escape(whitespace_re.sub(' ', text or ''))
 
     def __getattr__(self, attr):
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 68bb81e..caac0fd 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -276,10 +276,6 @@ def test_ol():
     assert md('<ol start="3"><li>a</li><li>b</li></ol>') == '3. a\n4. b\n'
 
 
-def test_nested_ols():
-    assert md(nested_ols) == '1. 1 \n\t1. a \n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'
-
-
 def test_p():
     assert md('<p>hello</p>') == 'hello\n\n'
 
@@ -292,6 +288,10 @@ def test_ul():
     assert md('<ul><li>a</li><li>b</li></ul>') == '* a\n* b\n'
 
 
+def test_nested_ols():
+    assert md(nested_ols) == '\n1. 1\n\t1. a\n\t\t1. I\n\t\t2. II\n\t\t3. III\n\t2. b\n\t3. c\n2. 2\n3. 3\n'
+
+
 def test_inline_ul():
     assert md('<p>foo</p><ul><li>a</li><li>b</li></ul><p>bar</p>') == 'foo\n\n* a\n* b\n\nbar\n\n'
 
@@ -301,11 +301,11 @@ def test_nested_uls():
     Nested ULs should alternate bullet characters.
 
     """
-    assert md(nested_uls) == '* 1 \n\t+ a \n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
+    assert md(nested_uls) == '\n* 1\n\t+ a\n\t\t- I\n\t\t- II\n\t\t- III\n\t+ b\n\t+ c\n* 2\n* 3\n'
 
 
 def test_bullets():
-    assert md(nested_uls, bullets='-') == '- 1 \n\t- a \n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
+    assert md(nested_uls, bullets='-') == '\n- 1\n\t- a\n\t\t- I\n\t\t- II\n\t\t- III\n\t- b\n\t- c\n- 2\n- 3\n'
 
 
 def test_img():