Improved handling of "first row, not headline". Works for tables with 1) neither thead nor tbody 2) tbody but no thead
172 lines
3.9 KiB
Python
172 lines
3.9 KiB
Python
from markdownify import markdownify as md
|
|
|
|
|
|
table = """<table>
|
|
<tr>
|
|
<th>Firstname</th>
|
|
<th>Lastname</th>
|
|
<th>Age</th>
|
|
</tr>
|
|
<tr>
|
|
<td>Jill</td>
|
|
<td>Smith</td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</table>"""
|
|
|
|
|
|
table_with_html_content = """<table>
|
|
<tr>
|
|
<th>Firstname</th>
|
|
<th>Lastname</th>
|
|
<th>Age</th>
|
|
</tr>
|
|
<tr>
|
|
<td><b>Jill</b></td>
|
|
<td><i>Smith</i></td>
|
|
<td><a href="#">50</a></td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</table>"""
|
|
|
|
|
|
table_with_paragraphs = """<table>
|
|
<tr>
|
|
<th>Firstname</th>
|
|
<th><p>Lastname</p></th>
|
|
<th>Age</th>
|
|
</tr>
|
|
<tr>
|
|
<td><p>Jill</p></td>
|
|
<td><p>Smith</p></td>
|
|
<td><p>50</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</table>"""
|
|
|
|
|
|
table_with_header_column = """<table>
|
|
<tr>
|
|
<th>Firstname</th>
|
|
<th>Lastname</th>
|
|
<th>Age</th>
|
|
</tr>
|
|
<tr>
|
|
<th>Jill</th>
|
|
<td>Smith</td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<th>Eve</th>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</table>"""
|
|
|
|
|
|
table_head_body = """<table>
|
|
<thead>
|
|
<tr>
|
|
<th>Firstname</th>
|
|
<th>Lastname</th>
|
|
<th>Age</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td>Jill</td>
|
|
<td>Smith</td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>"""
|
|
|
|
table_missing_text = """<table>
|
|
<thead>
|
|
<tr>
|
|
<th></th>
|
|
<th>Lastname</th>
|
|
<th>Age</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td>Jill</td>
|
|
<td></td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>"""
|
|
|
|
table_missing_head = """<table>
|
|
<tr>
|
|
<td>Firstname</td>
|
|
<td>Lastname</td>
|
|
<td>Age</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Jill</td>
|
|
<td>Smith</td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</table>"""
|
|
|
|
table_body = """<table>
|
|
<tbody>
|
|
<tr>
|
|
<td>Firstname</td>
|
|
<td>Lastname</td>
|
|
<td>Age</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Jill</td>
|
|
<td>Smith</td>
|
|
<td>50</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Eve</td>
|
|
<td>Jackson</td>
|
|
<td>94</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>"""
|
|
|
|
|
|
def test_table():
|
|
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_with_html_content) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| **Jill** | *Smith* | [50](#) |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_with_paragraphs) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_with_header_column) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_head_body) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_missing_text) == '\n\n| | Lastname | Age |\n| --- | --- | --- |\n| Jill | | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
|
assert md(table_body) == '\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|