From 3354f143d81d5774f0250b7d5e4f50e3d3d8cd12 Mon Sep 17 00:00:00 2001 From: Andrew Richards Date: Mon, 23 Nov 2020 16:16:10 +0000 Subject: [PATCH 1/8] Add method for tag Add method and tests for inline tag . --- markdownify/__init__.py | 12 ++++++++++++ tests/test_advanced.py | 8 ++++++++ tests/test_conversions.py | 21 +++++++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 2d5daf1..44d729d 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -143,6 +143,18 @@ class MarkdownConverter(object): return '' return '%s*%s*%s' % (prefix, text, suffix) + def convert_code(self, el, text): + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s`%s`%s' % (prefix, text, suffix) + + def convert_samp(self, el, text): + return self.convert_code(el, text) + + def convert_kbd(self, el, text): + return self.convert_code(el, text) + def convert_hn(self, n, el, text): style = self.options['heading_style'] text = text.rstrip() diff --git a/tests/test_advanced.py b/tests/test_advanced.py index 4c480d7..7694fa1 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -4,3 +4,11 @@ from markdownify import markdownify as md def test_nested(): text = md('

This is an example link.

') assert text == 'This is an [example link](http://example.com/).\n\n' + + +def test_code_with_tricky_content(): + assert md('>') == "`>`" + assert md('/home/username') == "`/home/`**username**" + # convert_br() adds trailing spaces (why?); ignore them by using 2 tests, + assert md('Line1
Line2
').startswith("`Line1") + assert md('Line1
Line2
').endswith("\nLine2`") diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 12ce36b..5b7db1f 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -94,6 +94,27 @@ def test_em_spaces(): assert md('foo bar') == 'foo bar' +def code_samp_kbd_tests(tag): + # Basically re-use test_em() and test_em_spaces(), + assert md(f'<{tag}>Hello') == '`Hello`' + assert md(f'foo <{tag}>Hello bar') == 'foo `Hello` bar' + assert md(f'foo<{tag}> Hello bar') == 'foo `Hello` bar' + assert md(f'foo <{tag}>Hello bar') == 'foo `Hello` bar' + assert md(f'foo <{tag}> bar') in ['foo bar', 'foo bar'] # Either is OK + + +def test_code(): + code_samp_kbd_tests('code') + + +def test_samp(): + code_samp_kbd_tests('samp') + + +def test_kbd(): + code_samp_kbd_tests('kbd') + + def test_h1(): assert md('

Hello

') == 'Hello\n=====\n\n' From 92a73c8dfe4f84094bf892988797a616ef9d83f0 Mon Sep 17 00:00:00 2001 From: Andrew Richards Date: Thu, 26 Nov 2020 22:20:29 +0000 Subject: [PATCH 2/8] Correct test_code_with_tricky_content() Result of previous test didn't check for the trailing ' ' that convert_br() adds: This is needed to ensure that the resulting markdown not only has \n for the
but also renders it as a newline. --- tests/test_advanced.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/test_advanced.py b/tests/test_advanced.py index 7694fa1..585e868 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -9,6 +9,5 @@ def test_nested(): def test_code_with_tricky_content(): assert md('>') == "`>`" assert md('/home/username') == "`/home/`**username**" - # convert_br() adds trailing spaces (why?); ignore them by using 2 tests, - assert md('Line1
Line2
').startswith("`Line1") - assert md('Line1
Line2
').endswith("\nLine2`") + assert md('First line blah blah
blah blah
second line') \ + == "First line `blah blah \nblah blah` second line" From 7685738344a27deeaa6148cd1cd117d69cb35967 Mon Sep 17 00:00:00 2001 From: Andrew Richards Date: Fri, 27 Nov 2020 14:18:08 +0000 Subject: [PATCH 3/8] Formatting tweak Change indent of continuation line; squashes a flake8 warning. --- tests/test_advanced.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_advanced.py b/tests/test_advanced.py index 585e868..7b62836 100644 --- a/tests/test_advanced.py +++ b/tests/test_advanced.py @@ -10,4 +10,4 @@ def test_code_with_tricky_content(): assert md('>') == "`>`" assert md('/home/username') == "`/home/`**username**" assert md('First line blah blah
blah blah
second line') \ - == "First line `blah blah \nblah blah` second line" + == "First line `blah blah \nblah blah` second line" From 89b577e91efa61df0cc4ca9a7672d7d24d27f1f7 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Fri, 21 May 2021 12:21:21 +0200 Subject: [PATCH 4/8] ordering functions alphabetically --- markdownify/__init__.py | 56 ++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 29 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 4e442bd..7271c69 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -198,6 +198,12 @@ class MarkdownConverter(object): else: return ' \n' + def convert_code(self, el, text, convert_as_inline): + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s`%s`%s' % (prefix, text, suffix) + def convert_em(self, el, text, convert_as_inline): em_tag = self.options['strong_em_symbol'] prefix, suffix, text = chomp(text) @@ -205,17 +211,7 @@ class MarkdownConverter(object): return '' return '%s%s%s%s%s' % (prefix, em_tag, text, em_tag, suffix) - def convert_code(self, el, text, convert_as_inline): - prefix, suffix, text = chomp(text) - if not text: - return '' - return '%s`%s`%s' % (prefix, text, suffix) - - def convert_samp(self, el, text, convert_as_inline): - return self.convert_code(el, text, convert_as_inline) - - def convert_kbd(self, el, text, convert_as_inline): - return self.convert_code(el, text, convert_as_inline) + convert_kbd = convert_code def convert_hn(self, n, el, text, convert_as_inline): if convert_as_inline: @@ -231,9 +227,22 @@ class MarkdownConverter(object): return '%s %s %s\n\n' % (hashes, text, hashes) return '%s %s\n\n' % (hashes, text) + def convert_hr(self, el, text, convert_as_inline): + return '\n\n---\n\n' + def convert_i(self, el, text, convert_as_inline): return self.convert_em(el, text, convert_as_inline) + def convert_img(self, el, text, convert_as_inline): + alt = el.attrs.get('alt', None) or '' + src = el.attrs.get('src', None) or '' + title = el.attrs.get('title', None) or '' + title_part = ' "%s"' % title.replace('"', r'\"') if title else '' + if convert_as_inline: + return alt + + return '![%s](%s%s)' % (alt, src, title_part) + def convert_list(self, el, text, convert_as_inline): # Converting a list to inline is undefined. @@ -286,19 +295,17 @@ class MarkdownConverter(object): return '' return '%s%s%s%s%s' % (prefix, strong_tag, text, strong_tag, suffix) - def convert_img(self, el, text, convert_as_inline): - alt = el.attrs.get('alt', None) or '' - src = el.attrs.get('src', None) or '' - title = el.attrs.get('title', None) or '' - title_part = ' "%s"' % title.replace('"', r'\"') if title else '' - if convert_as_inline: - return alt - - return '![%s](%s%s)' % (alt, src, title_part) + convert_samp = convert_code def convert_table(self, el, text, convert_as_inline): return '\n\n' + text + '\n' + def convert_td(self, el, text, convert_as_inline): + return ' ' + text + ' |' + + def convert_th(self, el, text, convert_as_inline): + return ' ' + text + ' |' + def convert_tr(self, el, text, convert_as_inline): cells = el.find_all(['td', 'th']) is_headrow = all([cell.name == 'th' for cell in cells]) @@ -314,15 +321,6 @@ class MarkdownConverter(object): overline += '| ' + ' | '.join(['---'] * len(cells)) + ' |' + '\n' return overline + '|' + text + '\n' + underline - def convert_th(self, el, text, convert_as_inline): - return ' ' + text + ' |' - - def convert_td(self, el, text, convert_as_inline): - return ' ' + text + ' |' - - def convert_hr(self, el, text, convert_as_inline): - return '\n\n---\n\n' - def markdownify(html, **options): return MarkdownConverter(**options).convert(html) From 079f32f6cd2790e846339189dad3e5dd9c2e5832 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Fri, 21 May 2021 12:27:49 +0200 Subject: [PATCH 5/8] added del and s tags --- markdownify/__init__.py | 8 ++++++++ tests/test_conversions.py | 24 ++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 7271c69..9aa9b54 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -204,6 +204,12 @@ class MarkdownConverter(object): return '' return '%s`%s`%s' % (prefix, text, suffix) + def convert_del(self, el, text, convert_as_inline): + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s~~%s~~%s' % (prefix, text, suffix) + def convert_em(self, el, text, convert_as_inline): em_tag = self.options['strong_em_symbol'] prefix, suffix, text = chomp(text) @@ -288,6 +294,8 @@ class MarkdownConverter(object): return text return '%s\n\n' % text if text else '' + convert_s = convert_del + def convert_strong(self, el, text, convert_as_inline): strong_tag = 2 * self.options['strong_em_symbol'] prefix, suffix, text = chomp(text) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index b3b4233..9642fcf 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -240,25 +240,33 @@ def test_em_spaces(): assert md('foo bar') == 'foo bar' -def code_samp_kbd_tests(tag): +def inline_tests(tag, markup): # Basically re-use test_em() and test_em_spaces(), - assert md(f'<{tag}>Hello') == '`Hello`' - assert md(f'foo <{tag}>Hello bar') == 'foo `Hello` bar' - assert md(f'foo<{tag}> Hello bar') == 'foo `Hello` bar' - assert md(f'foo <{tag}>Hello bar') == 'foo `Hello` bar' + assert md(f'<{tag}>Hello') == f'{markup}Hello{markup}' + assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' + assert md(f'foo<{tag}> Hello bar') == f'foo {markup}Hello{markup} bar' + assert md(f'foo <{tag}>Hello bar') == f'foo {markup}Hello{markup} bar' assert md(f'foo <{tag}> bar') in ['foo bar', 'foo bar'] # Either is OK def test_code(): - code_samp_kbd_tests('code') + inline_tests('code', '`') def test_samp(): - code_samp_kbd_tests('samp') + inline_tests('samp', '`') def test_kbd(): - code_samp_kbd_tests('kbd') + inline_tests('kbd', '`') + + +def test_del(): + inline_tests('del', '~~') + + +def test_s(): + inline_tests('s', '~~') def test_h1(): From 91d53ddd5a0d27d9caa51bfd2b43a5af8414c6bd Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Fri, 21 May 2021 13:53:00 +0200 Subject: [PATCH 6/8] refactor simple inline conversions --- markdownify/__init__.py | 49 +++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 9aa9b54..22f63e7 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -44,6 +44,23 @@ def chomp(text): return (prefix, suffix, text) +def abstract_inline_conversion(markup_fn): + """ + This abstracts all simple inline tags like b, em, del, ... + Returns a function that wraps the chomped text in a pair of the string + that is returned by markup_fn. markup_fn is necessary to allow for + references to self.strong_em_symbol etc. + """ + def implementation(self, el, text, convert_as_inline): + markup = markup_fn(self) + prefix, suffix, text = chomp(text) + if not text: + return '' + return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix) + return implementation + + + def _todict(obj): return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_')) @@ -179,8 +196,7 @@ class MarkdownConverter(object): title_part = ' "%s"' % title.replace('"', r'\"') if title else '' return '%s[%s](%s%s)%s' % (prefix, text, href, title_part, suffix) if href else text - def convert_b(self, el, text, convert_as_inline): - return self.convert_strong(el, text, convert_as_inline) + convert_b = abstract_inline_conversion(lambda self: 2 * self.options['strong_em_symbol']) def convert_blockquote(self, el, text, convert_as_inline): @@ -198,24 +214,11 @@ class MarkdownConverter(object): else: return ' \n' - def convert_code(self, el, text, convert_as_inline): - prefix, suffix, text = chomp(text) - if not text: - return '' - return '%s`%s`%s' % (prefix, text, suffix) + convert_code = abstract_inline_conversion(lambda self: '`') - def convert_del(self, el, text, convert_as_inline): - prefix, suffix, text = chomp(text) - if not text: - return '' - return '%s~~%s~~%s' % (prefix, text, suffix) + convert_del = abstract_inline_conversion(lambda self: '~~') - def convert_em(self, el, text, convert_as_inline): - em_tag = self.options['strong_em_symbol'] - prefix, suffix, text = chomp(text) - if not text: - return '' - return '%s%s%s%s%s' % (prefix, em_tag, text, em_tag, suffix) + convert_em = abstract_inline_conversion(lambda self: self.options['strong_em_symbol']) convert_kbd = convert_code @@ -236,8 +239,7 @@ class MarkdownConverter(object): def convert_hr(self, el, text, convert_as_inline): return '\n\n---\n\n' - def convert_i(self, el, text, convert_as_inline): - return self.convert_em(el, text, convert_as_inline) + convert_i = convert_em def convert_img(self, el, text, convert_as_inline): alt = el.attrs.get('alt', None) or '' @@ -296,12 +298,7 @@ class MarkdownConverter(object): convert_s = convert_del - def convert_strong(self, el, text, convert_as_inline): - strong_tag = 2 * self.options['strong_em_symbol'] - prefix, suffix, text = chomp(text) - if not text: - return '' - return '%s%s%s%s%s' % (prefix, strong_tag, text, strong_tag, suffix) + convert_strong = convert_b convert_samp = convert_code From 70ef9b6e481dd3e0db267f4ea1104250810e0a11 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Fri, 21 May 2021 14:15:41 +0200 Subject: [PATCH 7/8] added pre tag closes #15 --- markdownify/__init__.py | 25 +++++++++++++++++++++---- tests/test_conversions.py | 5 +++++ 2 files changed, 26 insertions(+), 4 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 22f63e7..9a30b34 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -60,7 +60,6 @@ def abstract_inline_conversion(markup_fn): return implementation - def _todict(obj): return dict((k, getattr(obj, k)) for k in dir(obj) if not k.startswith('_')) @@ -141,12 +140,21 @@ class MarkdownConverter(object): def process_text(self, el): text = six.text_type(el) + + # dont remove any whitespace when handling pre or code in pre + if (el.parent.name == 'pre' + or (el.parent.name == 'code' and el.parent.parent.name == 'pre')): + return escape(text or '') + + cleaned_text = escape(whitespace_re.sub(' ', text or '')) + # remove trailing whitespaces if any of the following condition is true: # - current text node is the last node in li # - current text node is followed by an embedded list if el.parent.name == 'li' and (not el.next_sibling or el.next_sibling.name in ['ul', 'ol']): - return escape(all_whitespace_re.sub(' ', text or '')).rstrip() - return escape(whitespace_re.sub(' ', text or '')) + return cleaned_text.rstrip() + + return cleaned_text def __getattr__(self, attr): # Handle headings @@ -214,7 +222,11 @@ class MarkdownConverter(object): else: return ' \n' - convert_code = abstract_inline_conversion(lambda self: '`') + def convert_code(self, el, text, convert_as_inline): + if el.parent.name == 'pre': + return text + converter = abstract_inline_conversion(lambda self: '`') + return converter(self, el, text, convert_as_inline) convert_del = abstract_inline_conversion(lambda self: '~~') @@ -296,6 +308,11 @@ class MarkdownConverter(object): return text return '%s\n\n' % text if text else '' + def convert_pre(self, el, text, convert_as_inline): + if not text: + return '' + return '\n```\n%s\n```\n' % text + convert_s = convert_del convert_strong = convert_b diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 9642fcf..354212b 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -261,6 +261,11 @@ def test_kbd(): inline_tests('kbd', '`') +def test_pre(): + assert md('
test\n    foo\nbar
') == '\n```\ntest\n foo\nbar\n```\n' + assert md('
test\n    foo\nbar
') == '\n```\ntest\n foo\nbar\n```\n' + + def test_del(): inline_tests('del', '~~') From 9cb940cbc003a6048cb4ad2be17deceb6f0659f1 Mon Sep 17 00:00:00 2001 From: AlexVonB Date: Fri, 21 May 2021 14:17:51 +0200 Subject: [PATCH 8/8] bump to v0.8.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 87bd84e..04dbb80 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ read = lambda filepath: codecs.open(filepath, 'r', 'utf-8').read() pkgmeta = { '__title__': 'markdownify', '__author__': 'Matthew Tretter', - '__version__': '0.7.4', + '__version__': '0.8.0', }