Compare commits

..

6 Commits
1.0.0 ... 1.1.0

Author SHA1 Message Date
chrispy
26566891a7 Merge branch 'develop' 2025-03-05 06:48:47 -05:00
chrispy
13183f9925 bump to version v1.1.0
Signed-off-by: chrispy <chrispy@synopsys.com>
2025-03-05 06:47:28 -05:00
Stephen V. Brown
7908f1492a Generalize handling of colspan in case where colspan is in first row but header row is missing (#203) 2025-03-04 20:01:16 -05:00
Chris Papademetrious
618747c18c in inline contexts, resolve <br/> to a space instead of an empty string (#202)
Signed-off-by: chrispy <chrispy@synopsys.com>
2025-03-04 07:37:22 -05:00
Chris Papademetrious
5122c973c1 add missing newlines for definition lists (#200)
Signed-off-by: chrispy <chrispy@synopsys.com>
2025-03-02 06:42:56 -05:00
itmammoth
ac5736f0a3 Support video tag with poster attribute (#189) 2025-02-28 10:51:42 +01:00
4 changed files with 71 additions and 18 deletions

View File

@@ -106,6 +106,7 @@ def should_remove_whitespace_inside(el):
return el.name in ('p', 'blockquote', return el.name in ('p', 'blockquote',
'article', 'div', 'section', 'article', 'div', 'section',
'ol', 'ul', 'li', 'ol', 'ul', 'li',
'dl', 'dt', 'dd',
'table', 'thead', 'tbody', 'tfoot', 'table', 'thead', 'tbody', 'tfoot',
'tr', 'td', 'th') 'tr', 'td', 'th')
@@ -442,7 +443,7 @@ class MarkdownConverter(object):
def convert_br(self, el, text, parent_tags): def convert_br(self, el, text, parent_tags):
if '_inline' in parent_tags: if '_inline' in parent_tags:
return "" return ' '
if self.options['newline_style'].lower() == BACKSLASH: if self.options['newline_style'].lower() == BACKSLASH:
return '\\\n' return '\\\n'
@@ -489,6 +490,11 @@ class MarkdownConverter(object):
return '%s\n' % text return '%s\n' % text
# definition lists are formatted as follows:
# https://pandoc.org/MANUAL.html#definition-lists
# https://michelf.ca/projects/php-markdown/extra/#def-list
convert_dl = convert_div
def convert_dt(self, el, text, parent_tags): def convert_dt(self, el, text, parent_tags):
# remove newlines from term text # remove newlines from term text
text = (text or '').strip() text = (text or '').strip()
@@ -501,7 +507,7 @@ class MarkdownConverter(object):
# TODO - format consecutive <dt> elements as directly adjacent lines): # TODO - format consecutive <dt> elements as directly adjacent lines):
# https://michelf.ca/projects/php-markdown/extra/#def-list # https://michelf.ca/projects/php-markdown/extra/#def-list
return '\n%s\n' % text return '\n\n%s\n' % text
def _convert_hn(self, n, el, text, parent_tags): def _convert_hn(self, n, el, text, parent_tags):
""" Method name prefixed with _ to prevent <hn> to call this """ """ Method name prefixed with _ to prevent <hn> to call this """
@@ -538,6 +544,24 @@ class MarkdownConverter(object):
return '![%s](%s%s)' % (alt, src, title_part) return '![%s](%s%s)' % (alt, src, title_part)
def convert_video(self, el, text, parent_tags):
if ('_inline' in parent_tags
and el.parent.name not in self.options['keep_inline_images_in']):
return text
src = el.attrs.get('src', None) or ''
if not src:
sources = el.find_all('source', attrs={'src': True})
if sources:
src = sources[0].attrs.get('src', None) or ''
poster = el.attrs.get('poster', None) or ''
if src and poster:
return '[![%s](%s)](%s)' % (text, poster, src)
if src:
return '[%s](%s)' % (text, src)
if poster:
return '![%s](%s)' % (text, poster)
return text
def convert_list(self, el, text, parent_tags): def convert_list(self, el, text, parent_tags):
# Converting a list to inline is undefined. # Converting a list to inline is undefined.
@@ -677,6 +701,12 @@ class MarkdownConverter(object):
) )
overline = '' overline = ''
underline = '' underline = ''
full_colspan = 0
for cell in cells:
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
full_colspan += int(cell["colspan"])
else:
full_colspan += 1
if ((is_headrow if ((is_headrow
or (is_head_row_missing or (is_head_row_missing
and self.options['table_infer_header'])) and self.options['table_infer_header']))
@@ -685,12 +715,6 @@ class MarkdownConverter(object):
# - is headline or # - is headline or
# - headline is missing and header inference is enabled # - headline is missing and header inference is enabled
# print headline underline # print headline underline
full_colspan = 0
for cell in cells:
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
full_colspan += int(cell["colspan"])
else:
full_colspan += 1
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
elif ((is_head_row_missing elif ((is_head_row_missing
and not self.options['table_infer_header']) and not self.options['table_infer_header'])
@@ -703,8 +727,8 @@ class MarkdownConverter(object):
# - the parent is table or # - the parent is table or
# - the parent is tbody at the beginning of a table. # - the parent is tbody at the beginning of a table.
# print empty headline above this row # print empty headline above this row
overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n' overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
return overline + '|' + text + '\n' + underline return overline + '|' + text + '\n' + underline

View File

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
[project] [project]
name = "markdownify" name = "markdownify"
version = "1.0.0" version = "1.1.0"
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}] authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
description = "Convert HTML to markdown." description = "Convert HTML to markdown."
readme = "README.rst" readme = "README.rst"

View File

@@ -79,6 +79,8 @@ def test_blockquote_nested():
def test_br(): def test_br():
assert md('a<br />b<br />c') == 'a \nb \nc' assert md('a<br />b<br />c') == 'a \nb \nc'
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
assert md('<h1>foo<br />bar</h1>', heading_style=ATX) == '\n\n# foo bar\n\n'
assert md('<td>foo<br />bar</td>', heading_style=ATX) == ' foo bar |'
def test_code(): def test_code():
@@ -102,13 +104,13 @@ def test_code():
def test_dl(): def test_dl():
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\nterm\n: definition\n' assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\n\nterm\n: definition\n\n'
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\nte rm\n: definition\n' assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\n\nte rm\n: definition\n\n'
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\nterm\n: definition-p1\n\n definition-p2\n' assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\n\nterm\n: definition-p1\n\n definition-p2\n\n'
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\nterm\n: definition 1\n: definition 2\n' assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\n\nterm\n: definition 1\n: definition 2\n\n'
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n' assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\n\nterm 1\n: definition 1\n\nterm 2\n: definition 2\n\n'
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\nterm\n: > line 1\n >\n > line 2\n' assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\n\nterm\n: > line 1\n >\n > line 2\n\n'
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n' assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\n\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n\n'
def test_del(): def test_del():
@@ -243,6 +245,14 @@ def test_img():
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)' assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == '![Alt text](/path/to/img.jpg)'
def test_video():
assert md('<video src="/path/to/video.mp4" poster="/path/to/img.jpg">text</video>') == '[![text](/path/to/img.jpg)](/path/to/video.mp4)'
assert md('<video src="/path/to/video.mp4">text</video>') == '[text](/path/to/video.mp4)'
assert md('<video><source src="/path/to/video.mp4"/>text</video>') == '[text](/path/to/video.mp4)'
assert md('<video poster="/path/to/img.jpg">text</video>') == '![text](/path/to/img.jpg)'
assert md('<video>text</video>') == 'text'
def test_kbd(): def test_kbd():
inline_tests('kbd', '`') inline_tests('kbd', '`')

View File

@@ -267,6 +267,23 @@ table_with_undefined_colspan = """<table>
</tr> </tr>
</table>""" </table>"""
table_with_colspan_missing_head = """<table>
<tr>
<td colspan="2">Name</td>
<td>Age</td>
</tr>
<tr>
<td>Jill</td>
<td>Smith</td>
<td>50</td>
</tr>
<tr>
<td>Eve</td>
<td>Jackson</td>
<td>94</td>
</tr>
</table>"""
def test_table(): def test_table():
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
@@ -283,6 +300,7 @@ def test_table():
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
def test_table_infer_header(): def test_table_infer_header():
@@ -300,3 +318,4 @@ def test_table_infer_header():
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'