From 73f7644c0d998ad9621099c2d93974033f5fbda8 Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 16:56:10 +0000 Subject: [PATCH 1/8] Add basic support for HTML tables --- markdownify/__init__.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5c008d3..5fdcbf3 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -237,6 +237,24 @@ class MarkdownConverter(object): return '![%s](%s%s)' % (alt, src, title_part) + def convert_table(self, el, text, convert_as_inline): + rows = el.find_all('tr') + text_data = [] + for row in rows: + headers = row.find_all('th') + columns = row.find_all('td') + if len(headers) > 0: + headers = [head.text.strip() for head in headers] + headers = [head for head in headers if head] + text_data.append(' | '.join(headers)) + text_data.append(' | '.join(['---'] * len(headers))) + elif len(columns) > 0: + columns = [colm.text.strip() for colm in columns] + text_data.append(' | '.join([colm for colm in columns if colm])) + else: + continue + return '\n'.join(text_data) + def markdownify(html, **options): return MarkdownConverter(**options).convert(html) From db96eeb7852f644e4269aa7d3b4372b58c7e4fb4 Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 16:56:41 +0000 Subject: [PATCH 2/8] Add tests for basic and thead/tbody tables --- tests/test_conversions.py | 56 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index edaefbc..f274324 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -22,6 +22,52 @@ nested_uls = re.sub(r'\s+', '', """ """) +table = re.sub(r'\s+', '', """ + + + + + + + + + + + + + + + + +
FirstnameLastnameAge
JillSmith50
EveJackson94
+""") + + +table_head_body = re.sub(r'\s+', '', """ + + + + + + + + + + + + + + + + + + + + +
FirstnameLastnameAge
JillSmith50
EveJackson94
+""") + + def test_chomp(): assert md(' ') == ' ' assert md(' ') == ' ' @@ -31,6 +77,11 @@ def test_chomp(): assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' + assert md('bold with br
italic') == '**bold with br***italic*' + + +def test_chomp_ext(): + assert md('bold with br
italic') == '**bold with br***italic*' def test_a(): @@ -216,3 +267,8 @@ def test_img(): def test_div(): assert md('Hello World') == 'Hello World' + + +def test_table(): + assert md(table) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' + assert md(table_head_body) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' From 292d64bbf4015e800282149249a5a3c3c1a394ef Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 19:26:27 +0000 Subject: [PATCH 3/8] Remove unnecessary tests --- tests/test_conversions.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index f274324..8c5e369 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -77,11 +77,6 @@ def test_chomp(): assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' assert md(' s ') == ' **s** ' - assert md('bold with br
italic') == '**bold with br***italic*' - - -def test_chomp_ext(): - assert md('bold with br
italic') == '**bold with br***italic*' def test_a(): From a152c5b7068bab9289e1650f1ca6c280011c8a2a Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 19:32:35 +0000 Subject: [PATCH 4/8] Fix lint --- tests/test_conversions.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 8c5e369..cff19bd 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -26,17 +26,17 @@ table = re.sub(r'\s+', '', """ - + - + - +
FirstnameLastnameLastname Age
JillSmithSmith 50
EveJacksonJackson 94
@@ -55,12 +55,12 @@ table_head_body = re.sub(r'\s+', '', """ Jill - Smith + Smith 50 Eve - Jackson + Jackson 94 From 8c28ade348d766705513e07ecb700718a1eb7f2c Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 20:50:15 +0000 Subject: [PATCH 5/8] Remove empty header validation to allow empty header --- markdownify/__init__.py | 3 +-- tests/test_conversions.py | 25 +++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5fdcbf3..fcdc32a 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -245,12 +245,11 @@ class MarkdownConverter(object): columns = row.find_all('td') if len(headers) > 0: headers = [head.text.strip() for head in headers] - headers = [head for head in headers if head] text_data.append(' | '.join(headers)) text_data.append(' | '.join(['---'] * len(headers))) elif len(columns) > 0: columns = [colm.text.strip() for colm in columns] - text_data.append(' | '.join([colm for colm in columns if colm])) + text_data.append(' | '.join(columns)) else: continue return '\n'.join(text_data) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index cff19bd..2d2e825 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -67,6 +67,30 @@ table_head_body = re.sub(r'\s+', '', """ """) +table_missing_header = re.sub(r'\s+', '', """ + + + + + + + + + + + + + + + + + + + + +
LastnameAge
JillSmith50
EveJackson94
+""") + def test_chomp(): assert md(' ') == ' ' @@ -267,3 +291,4 @@ def test_div(): def test_table(): assert md(table) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' assert md(table_head_body) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' + assert md(table_missing_header) == ' | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' From de6f91af0e10e7ecaa8a758d216f24a15b99a44a Mon Sep 17 00:00:00 2001 From: Bruno Miguens Date: Mon, 8 Feb 2021 20:56:18 +0000 Subject: [PATCH 6/8] Revert header validation and leave possibility to empty column --- markdownify/__init__.py | 1 + tests/test_conversions.py | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index fcdc32a..3bda85e 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -245,6 +245,7 @@ class MarkdownConverter(object): columns = row.find_all('td') if len(headers) > 0: headers = [head.text.strip() for head in headers] + headers = [head for head in headers if head] text_data.append(' | '.join(headers)) text_data.append(' | '.join(['---'] * len(headers))) elif len(columns) > 0: diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 2d2e825..de3307f 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -67,11 +67,11 @@ table_head_body = re.sub(r'\s+', '', """ """) -table_missing_header = re.sub(r'\s+', '', """ +table_missing_text = re.sub(r'\s+', '', """ - + @@ -79,7 +79,7 @@ table_missing_header = re.sub(r'\s+', '', """ - + @@ -291,4 +291,4 @@ def test_div(): def test_table(): assert md(table) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' assert md(table_head_body) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' - assert md(table_missing_header) == ' | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' + assert md(table_missing_text) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | | 50\nEve | Jackson | 94' From e1dbbfad42f6cb36da9a4d93710f8693ea82c374 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Thu, 22 Apr 2021 12:36:11 +0200 Subject: [PATCH 7/8] guard table lines with pipes, resolves the empty header problem --- markdownify/__init__.py | 7 +++---- tests/test_conversions.py | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 3bda85e..8200ca7 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -245,12 +245,11 @@ class MarkdownConverter(object): columns = row.find_all('td') if len(headers) > 0: headers = [head.text.strip() for head in headers] - headers = [head for head in headers if head] - text_data.append(' | '.join(headers)) - text_data.append(' | '.join(['---'] * len(headers))) + text_data.append('| ' + ' | '.join(headers) + ' |') + text_data.append('| ' + ' | '.join(['---'] * len(headers)) + ' |') elif len(columns) > 0: columns = [colm.text.strip() for colm in columns] - text_data.append(' | '.join(columns)) + text_data.append('| ' + ' | '.join(columns) + ' |') else: continue return '\n'.join(text_data) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index de3307f..bf09506 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -71,7 +71,7 @@ table_missing_text = re.sub(r'\s+', '', """
Firstname Lastname Age
JillSmith 50
- + @@ -289,6 +289,6 @@ def test_div(): def test_table(): - assert md(table) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' - assert md(table_head_body) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | Smith | 50\nEve | Jackson | 94' - assert md(table_missing_text) == 'Firstname | Lastname | Age\n--- | --- | ---\nJill | | 50\nEve | Jackson | 94' + assert md(table) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |' + assert md(table_head_body) == '| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |' + assert md(table_missing_text) == '| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |' From 651d5f00e88d38d582ddfbecb28621494d653dec Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Thu, 22 Apr 2021 12:43:17 +0200 Subject: [PATCH 8/8] bump to v0.7.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index db71182..61d07ec 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.6.6', + '__version__': '0.7.0', }
Firstname Lastname Age