From 7908f1492a466e72d4ade97161dae0bcadaaa489 Mon Sep 17 00:00:00 2001 From: "Stephen V. Brown" Date: Tue, 4 Mar 2025 20:01:16 -0500 Subject: [PATCH] Generalize handling of colspan in case where colspan is in first row but header row is missing (#203) --- markdownify/__init__.py | 16 ++++++++-------- tests/test_tables.py | 19 +++++++++++++++++++ 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index ac34ee5..7f69bfe 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -701,6 +701,12 @@ class MarkdownConverter(object): ) overline = '' underline = '' + full_colspan = 0 + for cell in cells: + if 'colspan' in cell.attrs and cell['colspan'].isdigit(): + full_colspan += int(cell["colspan"]) + else: + full_colspan += 1 if ((is_headrow or (is_head_row_missing and self.options['table_infer_header'])) @@ -709,12 +715,6 @@ class MarkdownConverter(object): # - is headline or # - headline is missing and header inference is enabled # print headline underline - full_colspan = 0 - for cell in cells: - if 'colspan' in cell.attrs and cell['colspan'].isdigit(): - full_colspan += int(cell["colspan"]) - else: - full_colspan += 1 underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' elif ((is_head_row_missing and not self.options['table_infer_header']) @@ -727,8 +727,8 @@ class MarkdownConverter(object): # - the parent is table or # - the parent is tbody at the beginning of a table. # print empty headline above this row - overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n' - overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' + overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n' + overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' return overline + '|' + text + '\n' + underline diff --git a/tests/test_tables.py b/tests/test_tables.py index e41b389..7e0670c 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -267,6 +267,23 @@ table_with_undefined_colspan = """
""" +table_with_colspan_missing_head = """ + + + + + + + + + + + + + + +
NameAge
JillSmith50
EveJackson94
""" + def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' @@ -283,6 +300,7 @@ def test_table(): assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' + assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' def test_table_infer_header(): @@ -300,3 +318,4 @@ def test_table_infer_header(): assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' + assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'