Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
26566891a7 | ||
|
|
13183f9925 | ||
|
|
7908f1492a | ||
|
|
618747c18c | ||
|
|
5122c973c1 | ||
|
|
ac5736f0a3 |
@@ -106,6 +106,7 @@ def should_remove_whitespace_inside(el):
|
|||||||
return el.name in ('p', 'blockquote',
|
return el.name in ('p', 'blockquote',
|
||||||
'article', 'div', 'section',
|
'article', 'div', 'section',
|
||||||
'ol', 'ul', 'li',
|
'ol', 'ul', 'li',
|
||||||
|
'dl', 'dt', 'dd',
|
||||||
'table', 'thead', 'tbody', 'tfoot',
|
'table', 'thead', 'tbody', 'tfoot',
|
||||||
'tr', 'td', 'th')
|
'tr', 'td', 'th')
|
||||||
|
|
||||||
@@ -442,7 +443,7 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
def convert_br(self, el, text, parent_tags):
|
def convert_br(self, el, text, parent_tags):
|
||||||
if '_inline' in parent_tags:
|
if '_inline' in parent_tags:
|
||||||
return ""
|
return ' '
|
||||||
|
|
||||||
if self.options['newline_style'].lower() == BACKSLASH:
|
if self.options['newline_style'].lower() == BACKSLASH:
|
||||||
return '\\\n'
|
return '\\\n'
|
||||||
@@ -489,6 +490,11 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
return '%s\n' % text
|
return '%s\n' % text
|
||||||
|
|
||||||
|
# definition lists are formatted as follows:
|
||||||
|
# https://pandoc.org/MANUAL.html#definition-lists
|
||||||
|
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
||||||
|
convert_dl = convert_div
|
||||||
|
|
||||||
def convert_dt(self, el, text, parent_tags):
|
def convert_dt(self, el, text, parent_tags):
|
||||||
# remove newlines from term text
|
# remove newlines from term text
|
||||||
text = (text or '').strip()
|
text = (text or '').strip()
|
||||||
@@ -501,7 +507,7 @@ class MarkdownConverter(object):
|
|||||||
# TODO - format consecutive <dt> elements as directly adjacent lines):
|
# TODO - format consecutive <dt> elements as directly adjacent lines):
|
||||||
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
# https://michelf.ca/projects/php-markdown/extra/#def-list
|
||||||
|
|
||||||
return '\n%s\n' % text
|
return '\n\n%s\n' % text
|
||||||
|
|
||||||
def _convert_hn(self, n, el, text, parent_tags):
|
def _convert_hn(self, n, el, text, parent_tags):
|
||||||
""" Method name prefixed with _ to prevent <hn> to call this """
|
""" Method name prefixed with _ to prevent <hn> to call this """
|
||||||
@@ -538,6 +544,24 @@ class MarkdownConverter(object):
|
|||||||
|
|
||||||
return '' % (alt, src, title_part)
|
return '' % (alt, src, title_part)
|
||||||
|
|
||||||
|
def convert_video(self, el, text, parent_tags):
|
||||||
|
if ('_inline' in parent_tags
|
||||||
|
and el.parent.name not in self.options['keep_inline_images_in']):
|
||||||
|
return text
|
||||||
|
src = el.attrs.get('src', None) or ''
|
||||||
|
if not src:
|
||||||
|
sources = el.find_all('source', attrs={'src': True})
|
||||||
|
if sources:
|
||||||
|
src = sources[0].attrs.get('src', None) or ''
|
||||||
|
poster = el.attrs.get('poster', None) or ''
|
||||||
|
if src and poster:
|
||||||
|
return '[](%s)' % (text, poster, src)
|
||||||
|
if src:
|
||||||
|
return '[%s](%s)' % (text, src)
|
||||||
|
if poster:
|
||||||
|
return '' % (text, poster)
|
||||||
|
return text
|
||||||
|
|
||||||
def convert_list(self, el, text, parent_tags):
|
def convert_list(self, el, text, parent_tags):
|
||||||
|
|
||||||
# Converting a list to inline is undefined.
|
# Converting a list to inline is undefined.
|
||||||
@@ -677,6 +701,12 @@ class MarkdownConverter(object):
|
|||||||
)
|
)
|
||||||
overline = ''
|
overline = ''
|
||||||
underline = ''
|
underline = ''
|
||||||
|
full_colspan = 0
|
||||||
|
for cell in cells:
|
||||||
|
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
|
||||||
|
full_colspan += int(cell["colspan"])
|
||||||
|
else:
|
||||||
|
full_colspan += 1
|
||||||
if ((is_headrow
|
if ((is_headrow
|
||||||
or (is_head_row_missing
|
or (is_head_row_missing
|
||||||
and self.options['table_infer_header']))
|
and self.options['table_infer_header']))
|
||||||
@@ -685,12 +715,6 @@ class MarkdownConverter(object):
|
|||||||
# - is headline or
|
# - is headline or
|
||||||
# - headline is missing and header inference is enabled
|
# - headline is missing and header inference is enabled
|
||||||
# print headline underline
|
# print headline underline
|
||||||
full_colspan = 0
|
|
||||||
for cell in cells:
|
|
||||||
if 'colspan' in cell.attrs and cell['colspan'].isdigit():
|
|
||||||
full_colspan += int(cell["colspan"])
|
|
||||||
else:
|
|
||||||
full_colspan += 1
|
|
||||||
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
||||||
elif ((is_head_row_missing
|
elif ((is_head_row_missing
|
||||||
and not self.options['table_infer_header'])
|
and not self.options['table_infer_header'])
|
||||||
@@ -703,8 +727,8 @@ class MarkdownConverter(object):
|
|||||||
# - the parent is table or
|
# - the parent is table or
|
||||||
# - the parent is tbody at the beginning of a table.
|
# - the parent is tbody at the beginning of a table.
|
||||||
# print empty headline above this row
|
# print empty headline above this row
|
||||||
overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n'
|
overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n'
|
||||||
overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n'
|
overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n'
|
||||||
return overline + '|' + text + '\n' + underline
|
return overline + '|' + text + '\n' + underline
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "markdownify"
|
name = "markdownify"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
|
authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}]
|
||||||
description = "Convert HTML to markdown."
|
description = "Convert HTML to markdown."
|
||||||
readme = "README.rst"
|
readme = "README.rst"
|
||||||
|
|||||||
@@ -79,6 +79,8 @@ def test_blockquote_nested():
|
|||||||
def test_br():
|
def test_br():
|
||||||
assert md('a<br />b<br />c') == 'a \nb \nc'
|
assert md('a<br />b<br />c') == 'a \nb \nc'
|
||||||
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
|
assert md('a<br />b<br />c', newline_style=BACKSLASH) == 'a\\\nb\\\nc'
|
||||||
|
assert md('<h1>foo<br />bar</h1>', heading_style=ATX) == '\n\n# foo bar\n\n'
|
||||||
|
assert md('<td>foo<br />bar</td>', heading_style=ATX) == ' foo bar |'
|
||||||
|
|
||||||
|
|
||||||
def test_code():
|
def test_code():
|
||||||
@@ -102,13 +104,13 @@ def test_code():
|
|||||||
|
|
||||||
|
|
||||||
def test_dl():
|
def test_dl():
|
||||||
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\nterm\n: definition\n'
|
assert md('<dl><dt>term</dt><dd>definition</dd></dl>') == '\n\nterm\n: definition\n\n'
|
||||||
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\nte rm\n: definition\n'
|
assert md('<dl><dt><p>te</p><p>rm</p></dt><dd>definition</dd></dl>') == '\n\nte rm\n: definition\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\nterm\n: definition-p1\n\n definition-p2\n'
|
assert md('<dl><dt>term</dt><dd><p>definition-p1</p><p>definition-p2</p></dd></dl>') == '\n\nterm\n: definition-p1\n\n definition-p2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\nterm\n: definition 1\n: definition 2\n'
|
assert md('<dl><dt>term</dt><dd><p>definition 1</p></dd><dd><p>definition 2</p></dd></dl>') == '\n\nterm\n: definition 1\n: definition 2\n\n'
|
||||||
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n'
|
assert md('<dl><dt>term 1</dt><dd>definition 1</dd><dt>term 2</dt><dd>definition 2</dd></dl>') == '\n\nterm 1\n: definition 1\n\nterm 2\n: definition 2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\nterm\n: > line 1\n >\n > line 2\n'
|
assert md('<dl><dt>term</dt><dd><blockquote><p>line 1</p><p>line 2</p></blockquote></dd></dl>') == '\n\nterm\n: > line 1\n >\n > line 2\n\n'
|
||||||
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n'
|
assert md('<dl><dt>term</dt><dd><ol><li><p>1</p><ul><li>2a</li><li>2b</li></ul></li><li><p>3</p></li></ol></dd></dl>') == '\n\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_del():
|
def test_del():
|
||||||
@@ -243,6 +245,14 @@ def test_img():
|
|||||||
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == ''
|
assert md('<img src="/path/to/img.jpg" alt="Alt text" />') == ''
|
||||||
|
|
||||||
|
|
||||||
|
def test_video():
|
||||||
|
assert md('<video src="/path/to/video.mp4" poster="/path/to/img.jpg">text</video>') == '[](/path/to/video.mp4)'
|
||||||
|
assert md('<video src="/path/to/video.mp4">text</video>') == '[text](/path/to/video.mp4)'
|
||||||
|
assert md('<video><source src="/path/to/video.mp4"/>text</video>') == '[text](/path/to/video.mp4)'
|
||||||
|
assert md('<video poster="/path/to/img.jpg">text</video>') == ''
|
||||||
|
assert md('<video>text</video>') == 'text'
|
||||||
|
|
||||||
|
|
||||||
def test_kbd():
|
def test_kbd():
|
||||||
inline_tests('kbd', '`')
|
inline_tests('kbd', '`')
|
||||||
|
|
||||||
|
|||||||
@@ -267,6 +267,23 @@ table_with_undefined_colspan = """<table>
|
|||||||
</tr>
|
</tr>
|
||||||
</table>"""
|
</table>"""
|
||||||
|
|
||||||
|
table_with_colspan_missing_head = """<table>
|
||||||
|
<tr>
|
||||||
|
<td colspan="2">Name</td>
|
||||||
|
<td>Age</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Jill</td>
|
||||||
|
<td>Smith</td>
|
||||||
|
<td>50</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Eve</td>
|
||||||
|
<td>Jackson</td>
|
||||||
|
<td>94</td>
|
||||||
|
</tr>
|
||||||
|
</table>"""
|
||||||
|
|
||||||
|
|
||||||
def test_table():
|
def test_table():
|
||||||
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
@@ -283,6 +300,7 @@ def test_table():
|
|||||||
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
|
assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n'
|
||||||
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
||||||
|
assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
|
|
||||||
|
|
||||||
def test_table_infer_header():
|
def test_table_infer_header():
|
||||||
@@ -300,3 +318,4 @@ def test_table_infer_header():
|
|||||||
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
|
assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n'
|
||||||
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n'
|
||||||
|
assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'
|
||||||
|
|||||||
Reference in New Issue
Block a user