diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 6230fb8..6c64c60 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -84,18 +84,26 @@ class MarkdownConverter(object):
if not children_only and isHeading:
convert_children_as_inline = True
- # Remove whitespace-only textnodes in lists
- def is_list_node(el):
- return el and el.name in ['ol', 'ul', 'li']
+ # Remove whitespace-only textnodes in purely nested nodes
+ def is_nested_node(el):
+ return el and el.name in ['ol', 'ul', 'li',
+ 'table', 'thead', 'tbody', 'tfoot',
+ 'tr', 'td', 'th']
- if is_list_node(node):
+ if is_nested_node(node):
for el in node.children:
- # Only extract (remove) whitespace-only text node if any of the conditions is true:
+ # Only extract (remove) whitespace-only text node if any of the
+ # conditions is true:
# - el is the first element in its parent
# - el is the last element in its parent
- # - el is adjacent to an list node
- can_extract = not el.previous_sibling or not el.next_sibling or is_list_node(el.previous_sibling) or is_list_node(el.next_sibling)
- if isinstance(el, NavigableString) and six.text_type(el).strip() == '' and can_extract:
+ # - el is adjacent to an nested node
+ can_extract = (not el.previous_sibling
+ or not el.next_sibling
+ or is_nested_node(el.previous_sibling)
+ or is_nested_node(el.next_sibling))
+ if (isinstance(el, NavigableString)
+ and six.text_type(el).strip() == ''
+ and can_extract):
el.extract()
# Convert the children first
@@ -277,21 +285,28 @@ class MarkdownConverter(object):
return '' % (alt, src, title_part)
def convert_table(self, el, text, convert_as_inline):
- rows = el.find_all('tr')
- text_data = []
- for row in rows:
- headers = row.find_all('th')
- columns = row.find_all('td')
- if len(headers) > 0:
- headers = [head.text.strip() for head in headers]
- text_data.append('| ' + ' | '.join(headers) + ' |')
- text_data.append('| ' + ' | '.join(['---'] * len(headers)) + ' |')
- elif len(columns) > 0:
- columns = [colm.text.strip() for colm in columns]
- text_data.append('| ' + ' | '.join(columns) + ' |')
- else:
- continue
- return '\n'.join(text_data)
+ return '\n\n' + text + '\n'
+
+ def convert_tr(self, el, text, convert_as_inline):
+ cells = el.find_all(['td', 'th'])
+ is_headrow = all([cell.name == 'th' for cell in cells])
+ overline = ''
+ underline = ''
+ if is_headrow and not el.previous_sibling:
+ # first row and is headline: print headline underline
+ underline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
+ elif not el.previous_sibling and not el.parent.name != 'table':
+ # first row, not headline, and the parent is sth. like tbody:
+ # print empty headline above this row
+ overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
+ overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
+ return overline + '|' + text + '\n' + underline
+
+ def convert_th(self, el, text, convert_as_inline):
+ return ' ' + text + ' |'
+
+ def convert_td(self, el, text, convert_as_inline):
+ return ' ' + text + ' |'
def convert_hr(self, el, text, convert_as_inline):
return '\n\n---\n\n'
diff --git a/setup.cfg b/setup.cfg
index e44b810..32e2565 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,2 +1,2 @@
[flake8]
-ignore = E501
+ignore = E501 W503
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 6663204..31fe7f2 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -1,5 +1,4 @@
from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE
-import re
nested_uls = """
@@ -41,8 +40,7 @@ nested_ols = """
"""
-table = re.sub(r'\s+', '', """
-
+table = """
| Firstname |
Lastname |
@@ -58,18 +56,54 @@ table = re.sub(r'\s+', '', """
Jackson |
94 |
-
-""")
+
"""
-table_head_body = re.sub(r'\s+', '', """
-
+table_with_html_content = """
+
+ | Firstname |
+ Lastname |
+ Age |
+
+
+ | Jill |
+ Smith |
+ 50 |
+
+
+ | Eve |
+ Jackson |
+ 94 |
+
+
"""
+
+
+table_with_header_column = """
+
+ | Firstname |
+ Lastname |
+ Age |
+
+
+ | Jill |
+ Smith |
+ 50 |
+
+
+ | Eve |
+ Jackson |
+ 94 |
+
+
"""
+
+
+table_head_body = """
-
+
| Firstname |
Lastname |
Age |
-
+
@@ -83,17 +117,15 @@ table_head_body = re.sub(r'\s+', '', """
| 94 |
-
-""")
+
"""
-table_missing_text = re.sub(r'\s+', '', """
-
+table_missing_text = """
-
+
|
Lastname |
Age |
-
+
@@ -107,8 +139,25 @@ table_missing_text = re.sub(r'\s+', '', """
| 94 |
-
-""")
+
"""
+
+table_missing_head = """
+
+ | Firstname |
+ Lastname |
+ Age |
+
+
+ | Jill |
+ Smith |
+ 50 |
+
+
+ | Eve |
+ Jackson |
+ 94 |
+
+
"""
def test_chomp():
@@ -322,9 +371,12 @@ def test_div():
def test_table():
- assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
- assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |'
- assert md(table_missing_text) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |'
+ assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
+ assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
def test_strong_em_symbol():