diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5d21506..7f69bfe 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -106,6 +106,7 @@ def should_remove_whitespace_inside(el): return el.name in ('p', 'blockquote', 'article', 'div', 'section', 'ol', 'ul', 'li', + 'dl', 'dt', 'dd', 'table', 'thead', 'tbody', 'tfoot', 'tr', 'td', 'th') @@ -442,7 +443,7 @@ class MarkdownConverter(object): def convert_br(self, el, text, parent_tags): if '_inline' in parent_tags: - return "" + return ' ' if self.options['newline_style'].lower() == BACKSLASH: return '\\\n' @@ -489,6 +490,11 @@ class MarkdownConverter(object): return '%s\n' % text + # definition lists are formatted as follows: + # https://pandoc.org/MANUAL.html#definition-lists + # https://michelf.ca/projects/php-markdown/extra/#def-list + convert_dl = convert_div + def convert_dt(self, el, text, parent_tags): # remove newlines from term text text = (text or '').strip() @@ -501,7 +507,7 @@ class MarkdownConverter(object): # TODO - format consecutive
elements as directly adjacent lines): # https://michelf.ca/projects/php-markdown/extra/#def-list - return '\n%s\n' % text + return '\n\n%s\n' % text def _convert_hn(self, n, el, text, parent_tags): """ Method name prefixed with _ to prevent to call this """ @@ -538,6 +544,24 @@ class MarkdownConverter(object): return '![%s](%s%s)' % (alt, src, title_part) + def convert_video(self, el, text, parent_tags): + if ('_inline' in parent_tags + and el.parent.name not in self.options['keep_inline_images_in']): + return text + src = el.attrs.get('src', None) or '' + if not src: + sources = el.find_all('source', attrs={'src': True}) + if sources: + src = sources[0].attrs.get('src', None) or '' + poster = el.attrs.get('poster', None) or '' + if src and poster: + return '[![%s](%s)](%s)' % (text, poster, src) + if src: + return '[%s](%s)' % (text, src) + if poster: + return '![%s](%s)' % (text, poster) + return text + def convert_list(self, el, text, parent_tags): # Converting a list to inline is undefined. @@ -677,6 +701,12 @@ class MarkdownConverter(object): ) overline = '' underline = '' + full_colspan = 0 + for cell in cells: + if 'colspan' in cell.attrs and cell['colspan'].isdigit(): + full_colspan += int(cell["colspan"]) + else: + full_colspan += 1 if ((is_headrow or (is_head_row_missing and self.options['table_infer_header'])) @@ -685,12 +715,6 @@ class MarkdownConverter(object): # - is headline or # - headline is missing and header inference is enabled # print headline underline - full_colspan = 0 - for cell in cells: - if 'colspan' in cell.attrs and cell['colspan'].isdigit(): - full_colspan += int(cell["colspan"]) - else: - full_colspan += 1 underline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' elif ((is_head_row_missing and not self.options['table_infer_header']) @@ -703,8 +727,8 @@ class MarkdownConverter(object): # - the parent is table or # - the parent is tbody at the beginning of a table. # print empty headline above this row - overline += '| ' + ' | '.join([''] * len(cells)) + ' |' + '\n' - overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' + overline += '| ' + ' | '.join([''] * full_colspan) + ' |' + '\n' + overline += '| ' + ' | '.join(['---'] * full_colspan) + ' |' + '\n' return overline + '|' + text + '\n' + underline diff --git a/pyproject.toml b/pyproject.toml index a6ae3e5..7306055 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "markdownify" -version = "1.0.0" +version = "1.1.0" authors = [{name = "Matthew Tretter", email = "m@tthewwithanm.com"}] description = "Convert HTML to markdown." readme = "README.rst" diff --git a/tests/test_conversions.py b/tests/test_conversions.py index e851ac2..0df8f57 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -79,6 +79,8 @@ def test_blockquote_nested(): def test_br(): assert md('a
b
c') == 'a \nb \nc' assert md('a
b
c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' + assert md('

foo
bar

', heading_style=ATX) == '\n\n# foo bar\n\n' + assert md('foo
bar', heading_style=ATX) == ' foo bar |' def test_code(): @@ -102,13 +104,13 @@ def test_code(): def test_dl(): - assert md('
term
definition
') == '\nterm\n: definition\n' - assert md('

te

rm

definition
') == '\nte rm\n: definition\n' - assert md('
term

definition-p1

definition-p2

') == '\nterm\n: definition-p1\n\n definition-p2\n' - assert md('
term

definition 1

definition 2

') == '\nterm\n: definition 1\n: definition 2\n' - assert md('
term 1
definition 1
term 2
definition 2
') == '\nterm 1\n: definition 1\nterm 2\n: definition 2\n' - assert md('
term

line 1

line 2

') == '\nterm\n: > line 1\n >\n > line 2\n' - assert md('
term
  1. 1

    • 2a
    • 2b
  2. 3

') == '\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n' + assert md('
term
definition
') == '\n\nterm\n: definition\n\n' + assert md('

te

rm

definition
') == '\n\nte rm\n: definition\n\n' + assert md('
term

definition-p1

definition-p2

') == '\n\nterm\n: definition-p1\n\n definition-p2\n\n' + assert md('
term

definition 1

definition 2

') == '\n\nterm\n: definition 1\n: definition 2\n\n' + assert md('
term 1
definition 1
term 2
definition 2
') == '\n\nterm 1\n: definition 1\n\nterm 2\n: definition 2\n\n' + assert md('
term

line 1

line 2

') == '\n\nterm\n: > line 1\n >\n > line 2\n\n' + assert md('
term
  1. 1

    • 2a
    • 2b
  2. 3

') == '\n\nterm\n: 1. 1\n\n * 2a\n * 2b\n 2. 3\n\n' def test_del(): @@ -243,6 +245,14 @@ def test_img(): assert md('Alt text') == '![Alt text](/path/to/img.jpg)' +def test_video(): + assert md('') == '[![text](/path/to/img.jpg)](/path/to/video.mp4)' + assert md('') == '[text](/path/to/video.mp4)' + assert md('') == '[text](/path/to/video.mp4)' + assert md('') == '![text](/path/to/img.jpg)' + assert md('') == 'text' + + def test_kbd(): inline_tests('kbd', '`') diff --git a/tests/test_tables.py b/tests/test_tables.py index e41b389..7e0670c 100644 --- a/tests/test_tables.py +++ b/tests/test_tables.py @@ -267,6 +267,23 @@ table_with_undefined_colspan = """
""" +table_with_colspan_missing_head = """ + + + + + + + + + + + + + + +
NameAge
JillSmith50
EveJackson94
""" + def test_table(): assert md(table) == '\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' @@ -283,6 +300,7 @@ def test_table(): assert md(table_with_caption) == 'TEXT\n\nCaption\n\n| | | |\n| --- | --- | --- |\n| Firstname | Lastname | Age |\n\n' assert md(table_with_colspan) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' + assert md(table_with_colspan_missing_head) == '\n\n| | | |\n| --- | --- | --- |\n| Name | | Age |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' def test_table_infer_header(): @@ -300,3 +318,4 @@ def test_table_infer_header(): assert md(table_with_caption, table_infer_header=True) == 'TEXT\n\nCaption\n\n| Firstname | Lastname | Age |\n| --- | --- | --- |\n\n' assert md(table_with_colspan, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n' assert md(table_with_undefined_colspan, table_infer_header=True) == '\n\n| Name | Age |\n| --- | --- |\n| Jill | Smith |\n\n' + assert md(table_with_colspan_missing_head, table_infer_header=True) == '\n\n| Name | | Age |\n| --- | --- | --- |\n| Jill | Smith | 50 |\n| Eve | Jackson | 94 |\n\n'