From 7dac92e85e9288595167320b4f32325e8f56ff97 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Sun, 16 May 2021 19:02:00 +0200
Subject: [PATCH 1/3] Allow for tables without header row

fixes #42
---
 markdownify/__init__.py   |  8 ++++++-
 tests/test_conversions.py | 45 +++++++++++++++++++++++++--------------
 2 files changed, 36 insertions(+), 17 deletions(-)
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 6230fb8..284eba3 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -279,14 +279,20 @@ class MarkdownConverter(object):
     def convert_table(self, el, text, convert_as_inline):
         rows = el.find_all('tr')
         text_data = []
+        rendered_header = False
         for row in rows:
             headers = row.find_all('th')
             columns = row.find_all('td')
-            if len(headers) > 0:
+            if not rendered_header and len(headers) > 0:
                 headers = [head.text.strip() for head in headers]
                 text_data.append('| ' + ' | '.join(headers) + ' |')
                 text_data.append('| ' + ' | '.join(['---'] * len(headers)) + ' |')
+                rendered_header = True
             elif len(columns) > 0:
+                if not rendered_header:
+                    text_data.append('| ' + ' | '.join([''] * len(columns)) + ' |')
+                    text_data.append('| ' + ' | '.join(['---'] * len(columns)) + ' |')
+                    rendered_header = True
                 columns = [colm.text.strip() for colm in columns]
                 text_data.append('| ' + ' | '.join(columns) + ' |')
             else:
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 6663204..e6f70c0 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -41,8 +41,7 @@ nested_ols = """
     </ul>"""
 
 
-table = re.sub(r'\s+', '', """
-<table>
+table = """<table>
     <tr>
         <th>Firstname</th>
         <th>Lastname</th>
@@ -58,18 +57,16 @@ table = re.sub(r'\s+', '', """
         <td>Jackson</td>
         <td>94</td>
     </tr>
-</table>
-""")
+</table>"""
 
 
-table_head_body = re.sub(r'\s+', '', """
-<table>
+table_head_body = """<table>
     <thead>
-            <tr>
+        <tr>
             <th>Firstname</th>
             <th>Lastname</th>
             <th>Age</th>
-            </tr>
+        </tr>
     </thead>
     <tbody>
         <tr>
@@ -83,17 +80,15 @@ table_head_body = re.sub(r'\s+', '', """
             <td>94</td>
         </tr>
     </tbody>
-</table>
-""")
+</table>"""
 
-table_missing_text = re.sub(r'\s+', '', """
-<table>
+table_missing_text = """<table>
     <thead>
-            <tr>
+        <tr>
             <th></th>
             <th>Lastname</th>
             <th>Age</th>
-            </tr>
+        </tr>
     </thead>
     <tbody>
         <tr>
@@ -107,8 +102,25 @@ table_missing_text = re.sub(r'\s+', '', """
             <td>94</td>
         </tr>
     </tbody>
-</table>
-""")
+</table>"""
+
+table_missing_head = """<table>
+    <tr>
+        <td>Firstname</td>
+        <td>Lastname</td>
+        <td>Age</td>
+    </tr>
+    <tr>
+        <td>Jill</td>
+        <td>Smith</td>
+        <td>50</td>
+    </tr>
+    <tr>
+        <td>Eve</td>
+        <td>Jackson</td>
+        <td>94</td>
+    </tr>
+</table>"""
 
 
 def test_chomp():
@@ -325,6 +337,7 @@ def test_table():
     assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
     assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
     assert md(table_missing_text) == '|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |'
+    assert md(table_missing_head) == '|  |  |  |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
 
 
 def test_strong_em_symbol():

From e6da15c173d52aaea2d78708463bf7e796bf4ccd Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Mon, 17 May 2021 12:36:48 +0200
Subject: [PATCH 2/3] allow tables with headers in first (or any) column

---
 markdownify/__init__.py   | 21 ++++++++++-----------
 tests/test_conversions.py | 20 ++++++++++++++++++++
 2 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 284eba3..d3a2e6e 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -281,20 +281,19 @@ class MarkdownConverter(object):
         text_data = []
         rendered_header = False
         for row in rows:
-            headers = row.find_all('th')
-            columns = row.find_all('td')
-            if not rendered_header and len(headers) > 0:
-                headers = [head.text.strip() for head in headers]
-                text_data.append('| ' + ' | '.join(headers) + ' |')
-                text_data.append('| ' + ' | '.join(['---'] * len(headers)) + ' |')
+            cells = row.find_all(['td', 'th'])
+            is_headrow = all([cell.name == 'th' for cell in cells])
+            texts = [cell.text.strip() for cell in cells]
+            if not rendered_header and is_headrow:
+                text_data.append('| ' + ' | '.join(texts) + ' |')
+                text_data.append('| ' + ' | '.join(['---'] * len(cells)) + ' |')
                 rendered_header = True
-            elif len(columns) > 0:
+            elif len(cells) > 0:
                 if not rendered_header:
-                    text_data.append('| ' + ' | '.join([''] * len(columns)) + ' |')
-                    text_data.append('| ' + ' | '.join(['---'] * len(columns)) + ' |')
+                    text_data.append('| ' + ' | '.join([''] * len(cells)) + ' |')
+                    text_data.append('| ' + ' | '.join(['---'] * len(cells)) + ' |')
                     rendered_header = True
-                columns = [colm.text.strip() for colm in columns]
-                text_data.append('| ' + ' | '.join(columns) + ' |')
+                text_data.append('| ' + ' | '.join(texts) + ' |')
             else:
                 continue
         return '\n'.join(text_data)
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index e6f70c0..e2f7c39 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -60,6 +60,25 @@ table = """<table>
 </table>"""
 
 
+table_with_header_column = """<table>
+    <tr>
+        <th>Firstname</th>
+        <th>Lastname</th>
+        <th>Age</th>
+    </tr>
+    <tr>
+        <th>Jill</th>
+        <td>Smith</td>
+        <td>50</td>
+    </tr>
+    <tr>
+        <th>Eve</th>
+        <td>Jackson</td>
+        <td>94</td>
+    </tr>
+</table>"""
+
+
 table_head_body = """<table>
     <thead>
         <tr>
@@ -335,6 +354,7 @@ def test_div():
 
 def test_table():
     assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
+    assert md(table_with_header_column) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
     assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
     assert md(table_missing_text) == '|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |'
     assert md(table_missing_head) == '|  |  |  |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'

From ea81407b87af48859719ef7bf454edb7792af6a7 Mon Sep 17 00:00:00 2001
From: AlexVonB <AlexVonB@users.noreply.github.com>
Date: Mon, 17 May 2021 14:00:00 +0200
Subject: [PATCH 3/3] implemented table parsing correctly

instead of manually walking down the dom tree
in a table, we now rely on the main descent loop
and just implement conversion for rows and cells
correctly. this enables the use of html inside a
table cell.
---
 markdownify/__init__.py   | 66 ++++++++++++++++++++++-----------------
 setup.cfg                 |  2 +-
 tests/test_conversions.py | 31 ++++++++++++++----
 3 files changed, 64 insertions(+), 35 deletions(-)

diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index d3a2e6e..6c64c60 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -84,18 +84,26 @@ class MarkdownConverter(object):
         if not children_only and isHeading:
             convert_children_as_inline = True
 
-        # Remove whitespace-only textnodes in lists
-        def is_list_node(el):
-            return el and el.name in ['ol', 'ul', 'li']
+        # Remove whitespace-only textnodes in purely nested nodes
+        def is_nested_node(el):
+            return el and el.name in ['ol', 'ul', 'li',
+                                      'table', 'thead', 'tbody', 'tfoot',
+                                      'tr', 'td', 'th']
 
-        if is_list_node(node):
+        if is_nested_node(node):
             for el in node.children:
-                # Only extract (remove) whitespace-only text node if any of the conditions is true:
+                # Only extract (remove) whitespace-only text node if any of the
+                # conditions is true:
                 # - el is the first element in its parent
                 # - el is the last element in its parent
-                # - el is adjacent to an list node
-                can_extract = not el.previous_sibling or not el.next_sibling or is_list_node(el.previous_sibling) or is_list_node(el.next_sibling)
-                if isinstance(el, NavigableString) and six.text_type(el).strip() == '' and can_extract:
+                # - el is adjacent to an nested node
+                can_extract = (not el.previous_sibling
+                               or not el.next_sibling
+                               or is_nested_node(el.previous_sibling)
+                               or is_nested_node(el.next_sibling))
+                if (isinstance(el, NavigableString)
+                        and six.text_type(el).strip() == ''
+                        and can_extract):
                     el.extract()
 
         # Convert the children first
@@ -277,26 +285,28 @@ class MarkdownConverter(object):
         return '![%s](%s%s)' % (alt, src, title_part)
 
     def convert_table(self, el, text, convert_as_inline):
-        rows = el.find_all('tr')
-        text_data = []
-        rendered_header = False
-        for row in rows:
-            cells = row.find_all(['td', 'th'])
-            is_headrow = all([cell.name == 'th' for cell in cells])
-            texts = [cell.text.strip() for cell in cells]
-            if not rendered_header and is_headrow:
-                text_data.append('| ' + ' | '.join(texts) + ' |')
-                text_data.append('| ' + ' | '.join(['---'] * len(cells)) + ' |')
-                rendered_header = True
-            elif len(cells) > 0:
-                if not rendered_header:
-                    text_data.append('| ' + ' | '.join([''] * len(cells)) + ' |')
-                    text_data.append('| ' + ' | '.join(['---'] * len(cells)) + ' |')
-                    rendered_header = True
-                text_data.append('| ' + ' | '.join(texts) + ' |')
-            else:
-                continue
-        return '\n'.join(text_data)
+        return '\n\n' + text + '\n'
+
+    def convert_tr(self, el, text, convert_as_inline):
+        cells = el.find_all(['td', 'th'])
+        is_headrow = all([cell.name == 'th' for cell in cells])
+        overline = ''
+        underline = ''
+        if is_headrow and not el.previous_sibling:
+            # first row and is headline: print headline underline
+            underline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
+        elif not el.previous_sibling and not el.parent.name != 'table':
+            # first row, not headline, and the parent is sth. like tbody:
+            # print empty headline above this row
+            overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
+            overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
+        return overline + '|' + text + '\n' + underline
+
+    def convert_th(self, el, text, convert_as_inline):
+        return ' ' + text + ' |'
+
+    def convert_td(self, el, text, convert_as_inline):
+        return ' ' + text + ' |'
 
     def convert_hr(self, el, text, convert_as_inline):
         return '\n\n---\n\n'
diff --git a/setup.cfg b/setup.cfg
index e44b810..32e2565 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,2 @@
 [flake8]
-ignore = E501
+ignore = E501 W503
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index e2f7c39..31fe7f2 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,5 +1,4 @@
 from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE
-import re
 
 
 nested_uls = """
@@ -60,6 +59,25 @@ table = """<table>
 </table>"""
 
 
+table_with_html_content = """<table>
+    <tr>
+        <th>Firstname</th>
+        <th>Lastname</th>
+        <th>Age</th>
+    </tr>
+    <tr>
+        <td><b>Jill</b></td>
+        <td><i>Smith</i></td>
+        <td><a href="#">50</a></td>
+    </tr>
+    <tr>
+        <td>Eve</td>
+        <td>Jackson</td>
+        <td>94</td>
+    </tr>
+</table>"""
+
+
 table_with_header_column = """<table>
     <tr>
         <th>Firstname</th>
@@ -353,11 +371,12 @@ def test_div():
 
 
 def test_table():
-    assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
-    assert md(table_with_header_column) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
-    assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
-    assert md(table_missing_text) == '|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |'
-    assert md(table_missing_head) == '|  |  |  |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
+    assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_missing_text) == '\n\n|  | Lastname | Age |\n| --- | --- | --- |\n| Jill |  | 50 |\n| Eve | Jackson | 94 |\n\n'
+    assert md(table_missing_head) == '\n\n|  |  |  |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
 
 
 def test_strong_em_symbol():