From 0fdeb1ff6e5c7e13fcda525be49dfff8e143042b Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Wed, 25 Aug 2021 08:48:30 +0200 Subject: [PATCH] convert tags inside table cells as inline in part resolves #49 --- markdownify/__init__.py | 9 +++++---- tests/test_tables.py | 20 ++++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 47485f5..3d67a49 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -96,11 +96,14 @@ class MarkdownConverter(object): def process_tag(self, node, convert_as_inline, children_only=False): text = '' - # markdown headings can't include block elements (elements w/newlines) + + # markdown headings or cells can't include + # block elements (elements w/newlines) isHeading = html_heading_re.match(node.name) is not None + isCell = node.name in ['td', 'th'] convert_children_as_inline = convert_as_inline - if not children_only and isHeading: + if not children_only and (isHeading or isCell): convert_children_as_inline = True # Remove whitespace-only textnodes in purely nested nodes @@ -200,8 +203,6 @@ class MarkdownConverter(object): prefix, suffix, text = chomp(text) if not text: return '' - if convert_as_inline: - return text href = el.get('href') title = el.get('title') # For the replacement see #29: text nodes underscores are escaped diff --git a/tests/test_tables.py b/tests/test_tables.py index e481e92..27d4403 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -39,6 +39,25 @@ table_with_html_content = """
""" +table_with_paragraphs = """ + + + + + + + + + + + + + + + +
Firstname

Lastname

Age

Jill

Smith

50

EveJackson94
""" + + table_with_header_column = """ @@ -124,6 +143,7 @@ table_missing_head = """
Firstname
def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n' + assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'