From f093843f4018c39f82eda6ee327f626071bf3db5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 15 Feb 2021 16:19:19 +0100 Subject: [PATCH 01/12] Allow for a custom strong or emphasis symbol --- markdownify/__init__.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5c008d3..4542f9a 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -46,6 +46,7 @@ class MarkdownConverter(object): autolinks = True heading_style = UNDERLINED bullets = '*+-' # An iterable of bullet types. + strong_em_symbol = '*' class Options(DefaultOptions): pass @@ -157,10 +158,11 @@ class MarkdownConverter(object): return ' \n' def convert_em(self, el, text, convert_as_inline): + em_tag = self.options['strong_em_symbol'] prefix, suffix, text = chomp(text) if not text: return '' - return '%s*%s*%s' % (prefix, text, suffix) + return '%s%s%s%s%s' % (prefix, em_tag, text, em_tag, suffix) def convert_hn(self, n, el, text, convert_as_inline): if convert_as_inline: @@ -222,10 +224,11 @@ class MarkdownConverter(object): return '%s\n\n' % text if text else '' def convert_strong(self, el, text, convert_as_inline): + strong_tag = 2 * self.options['strong_em_symbol'] prefix, suffix, text = chomp(text) if not text: return '' - return '%s**%s**%s' % (prefix, text, suffix) + return '%s%s%s%s%s' % (prefix, strong_tag, text, strong_tag, suffix) def convert_img(self, el, text, convert_as_inline): alt = el.attrs.get('alt', None) or '' From b3ac4606a6697c97a08c09dc3c54d98af84eaf59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 15 Feb 2021 16:29:14 +0100 Subject: [PATCH 02/12] Allow for the use of backslash for newlines --- markdownify/__init__.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 4542f9a..59aa694 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -47,6 +47,7 @@ class MarkdownConverter(object): heading_style = UNDERLINED bullets = '*+-' # An iterable of bullet types. strong_em_symbol = '*' + newline = 'spaces' class Options(DefaultOptions): pass @@ -155,7 +156,10 @@ class MarkdownConverter(object): if convert_as_inline: return "" - return ' \n' + if self.options['newline'] == 'backslash': + return '\\\n' + else: + return ' \n' def convert_em(self, el, text, convert_as_inline): em_tag = self.options['strong_em_symbol'] From 29a4e551f772fb83f2c22d166bdce2ab24485de0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 15 Feb 2021 16:37:13 +0100 Subject: [PATCH 03/12] Update README with the two new options --- README.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.rst b/README.rst index 4d21411..0988ef1 100644 --- a/README.rst +++ b/README.rst @@ -75,6 +75,12 @@ bullets lists are nested. Otherwise, the bullet will alternate based on nesting level. Defaults to ``'*+-'``. +strong_em_symbol + In markdown, both `*` and `_` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, which defaults to `*`. + +newline + Defines the style of marking linebreaks (`
`) in markdown. The default value `'spaces'` of this option means the regular ' \n' will be used (i.e. two spaces and a newline), while `'backslash'` will convert a linebreak to `''\\\n'` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of converters. + Options may be specified as kwargs to the ``markdownify`` function, or as a nested ``Options`` class in ``MarkdownConverter`` subclasses. From a79ed44ec38dc71e9739c5e95463877f4d9fb788 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 15 Feb 2021 16:51:20 +0100 Subject: [PATCH 04/12] Fix code ticks in README --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 0988ef1..eac2a00 100644 --- a/README.rst +++ b/README.rst @@ -76,10 +76,10 @@ bullets level. Defaults to ``'*+-'``. strong_em_symbol - In markdown, both `*` and `_` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, which defaults to `*`. + In markdown, both ``*`` and ``_`` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, that defaults to ``*``. newline - Defines the style of marking linebreaks (`
`) in markdown. The default value `'spaces'` of this option means the regular ' \n' will be used (i.e. two spaces and a newline), while `'backslash'` will convert a linebreak to `''\\\n'` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of converters. + Defines the style of marking linebreaks (``
``) in markdown. The default value ``'spaces'`` of this option means the regular `` \n`` will be used (i.e. two spaces and a newline), while ``'backslash'`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. Options may be specified as kwargs to the ``markdownify`` function, or as a nested ``Options`` class in ``MarkdownConverter`` subclasses. From 8da0bdf998d6792016e8ef96fc6452a4be15b6dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 10:28:46 +0200 Subject: [PATCH 05/12] Test strong_em_symbol --- tests/test_conversions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index edaefbc..fef2203 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -65,6 +65,7 @@ def test_a_no_autolinks(): def test_b(): assert md('Hello') == '**Hello**' + assert md('Hello', strong_em_symbol='_') == '__Hello__' def test_b_spaces(): @@ -89,6 +90,7 @@ def test_br(): def test_em(): assert md('Hello') == '*Hello*' + assert md('Hello', strong_em_symbol='_') == '_Hello_' def test_em_spaces(): @@ -174,6 +176,7 @@ def test_atx_closed_headings(): def test_i(): assert md('Hello') == '*Hello*' + assert md('Hello', strong_em_symbol='_') == '_Hello_' def test_ol(): @@ -187,6 +190,7 @@ def test_p(): def test_strong(): assert md('Hello') == '**Hello**' + assert md('Hello', strong_em_symbol='_') == '__Hello__' def test_ul(): From c04ec855dd5c4ed3697d219d99209da4528fa3f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 10:44:20 +0200 Subject: [PATCH 06/12] Change option to newline_style and use variables like heading_style does --- README.rst | 4 ++-- markdownify/__init__.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index eac2a00..6ab4911 100644 --- a/README.rst +++ b/README.rst @@ -78,8 +78,8 @@ bullets strong_em_symbol In markdown, both ``*`` and ``_`` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, that defaults to ``*``. -newline - Defines the style of marking linebreaks (``
``) in markdown. The default value ``'spaces'`` of this option means the regular `` \n`` will be used (i.e. two spaces and a newline), while ``'backslash'`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. +newline_style + Defines the style of marking linebreaks (``
``) in markdown. The default value ``SPACES`` of this option means the regular `` \n`` will be used (i.e. two spaces and a newline), while ``BACKSLASH`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. Options may be specified as kwargs to the ``markdownify`` function, or as a nested ``Options`` class in ``MarkdownConverter`` subclasses. diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 59aa694..5aa6f91 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -15,6 +15,9 @@ ATX_CLOSED = 'atx_closed' UNDERLINED = 'underlined' SETEXT = UNDERLINED +# Newline style +SPACES = 'spaces' +BACKSLASH = 'backslash' def escape(text): if not text: @@ -47,7 +50,7 @@ class MarkdownConverter(object): heading_style = UNDERLINED bullets = '*+-' # An iterable of bullet types. strong_em_symbol = '*' - newline = 'spaces' + newline_style = SPACES class Options(DefaultOptions): pass @@ -156,7 +159,7 @@ class MarkdownConverter(object): if convert_as_inline: return "" - if self.options['newline'] == 'backslash': + if self.options['newline_style'] == BACKSLASH: return '\\\n' else: return ' \n' From 16dbc471b989847eeae685a0e26fc924e23cc174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 10:47:55 +0200 Subject: [PATCH 07/12] Test newline_style --- tests/test_conversions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index fef2203..5f2ada7 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,4 +1,4 @@ -from markdownify import markdownify as md, ATX, ATX_CLOSED +from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH import re @@ -86,6 +86,7 @@ def test_nested_blockquote(): def test_br(): assert md('a
b
c') == 'a \nb \nc' + assert md('a
b
c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' def test_em(): From 7ee87b1d3260250e2654c18056b1c55e18ca009e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 10:50:23 +0200 Subject: [PATCH 08/12] Use .lower() on _style option fetching --- markdownify/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 5aa6f91..a23964c 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -159,7 +159,7 @@ class MarkdownConverter(object): if convert_as_inline: return "" - if self.options['newline_style'] == BACKSLASH: + if self.options['newline_style'].lower() == BACKSLASH: return '\\\n' else: return ' \n' @@ -175,7 +175,7 @@ class MarkdownConverter(object): if convert_as_inline: return text - style = self.options['heading_style'] + style = self.options['heading_style'].lower() text = text.rstrip() if style == UNDERLINED and n <= 2: line = '=' if n == 1 else '-' From 650f377b645b85c460caba99cde0e91bb76a90e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 11:13:19 +0200 Subject: [PATCH 09/12] Fix linting --- markdownify/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/markdownify/__init__.py b/markdownify/__init__.py index a23964c..08819aa 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -19,6 +19,7 @@ SETEXT = UNDERLINED SPACES = 'spaces' BACKSLASH = 'backslash' + def escape(text): if not text: return '' From 5580b0b51d57d21d99e9cf27cbdaff15e72fbcf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 11:13:52 +0200 Subject: [PATCH 10/12] Update README.rst --- README.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.rst b/README.rst index 6ab4911..a19fabe 100644 --- a/README.rst +++ b/README.rst @@ -79,7 +79,7 @@ strong_em_symbol In markdown, both ``*`` and ``_`` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, that defaults to ``*``. newline_style - Defines the style of marking linebreaks (``
``) in markdown. The default value ``SPACES`` of this option means the regular `` \n`` will be used (i.e. two spaces and a newline), while ``BACKSLASH`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. + Defines the style of marking linebreaks (``
``) in markdown. The default value ``SPACES`` of this option will adopt the usual two spaces and a newline, while ``BACKSLASH`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. Options may be specified as kwargs to the ``markdownify`` function, or as a nested ``Options`` class in ``MarkdownConverter`` subclasses. From e877602a5e988a665b9160158a020f8f825233c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Mon, 5 Apr 2021 11:28:42 +0200 Subject: [PATCH 11/12] Separate the strong_em_symbol and newline style tests --- tests/test_conversions.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/tests/test_conversions.py b/tests/test_conversions.py index 5f2ada7..d07d487 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -65,7 +65,6 @@ def test_a_no_autolinks(): def test_b(): assert md('Hello') == '**Hello**' - assert md('Hello', strong_em_symbol='_') == '__Hello__' def test_b_spaces(): @@ -86,12 +85,10 @@ def test_nested_blockquote(): def test_br(): assert md('a
b
c') == 'a \nb \nc' - assert md('a
b
c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' def test_em(): assert md('Hello') == '*Hello*' - assert md('Hello', strong_em_symbol='_') == '_Hello_' def test_em_spaces(): @@ -177,7 +174,6 @@ def test_atx_closed_headings(): def test_i(): assert md('Hello') == '*Hello*' - assert md('Hello', strong_em_symbol='_') == '_Hello_' def test_ol(): @@ -191,7 +187,6 @@ def test_p(): def test_strong(): assert md('Hello') == '**Hello**' - assert md('Hello', strong_em_symbol='_') == '__Hello__' def test_ul(): @@ -221,3 +216,14 @@ def test_img(): def test_div(): assert md('Hello World') == 'Hello World' + + +def test_strong_em_symbol(): + assert md('Hello', strong_em_symbol='_') == '__Hello__' + assert md('Hello', strong_em_symbol='_') == '__Hello__' + assert md('Hello', strong_em_symbol='_') == '_Hello_' + assert md('Hello', strong_em_symbol='_') == '_Hello_' + + +def test_newline_style(): + assert md('a
b
c', newline_style=BACKSLASH) == 'a\\\nb\\\nc' From 29c794e17d8a04ff879ac2c6e520d74b12e0e250 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andr=C3=A9=20van=20Delft?= Date: Sun, 18 Apr 2021 18:13:29 +0200 Subject: [PATCH 12/12] Introduce OPTIONs for `strong_em_symbol` --- README.rst | 10 ++++++++-- markdownify/__init__.py | 6 +++++- tests/test_conversions.py | 10 +++++----- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/README.rst b/README.rst index a19fabe..1e245c1 100644 --- a/README.rst +++ b/README.rst @@ -76,10 +76,16 @@ bullets level. Defaults to ``'*+-'``. strong_em_symbol - In markdown, both ``*`` and ``_`` are used to encode **strong** or *emphasized* texts. The preferred symbol can be passed through this argument, that defaults to ``*``. + In markdown, both ``*`` and ``_`` are used to encode **strong** or + *emphasized* texts. Either of these symbols can be chosen by the options + ``ASTERISK`` (default) or ``UNDERSCORE`` respectively. newline_style - Defines the style of marking linebreaks (``
``) in markdown. The default value ``SPACES`` of this option will adopt the usual two spaces and a newline, while ``BACKSLASH`` will convert a linebreak to ``\\n`` (a backslash an a newline). While the latter convention is non-standard, it is commonly preferred and supported by a lot of interpreters. + Defines the style of marking linebreaks (``
``) in markdown. The default + value ``SPACES`` of this option will adopt the usual two spaces and a newline, + while ``BACKSLASH`` will convert a linebreak to ``\\n`` (a backslash an a + newline). While the latter convention is non-standard, it is commonly + preferred and supported by a lot of interpreters. Options may be specified as kwargs to the ``markdownify`` function, or as a nested ``Options`` class in ``MarkdownConverter`` subclasses. diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 08819aa..6f90d73 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -19,6 +19,10 @@ SETEXT = UNDERLINED SPACES = 'spaces' BACKSLASH = 'backslash' +# Strong and emphasis style +ASTERISK = '*' +UNDERSCORE = '_' + def escape(text): if not text: @@ -50,7 +54,7 @@ class MarkdownConverter(object): autolinks = True heading_style = UNDERLINED bullets = '*+-' # An iterable of bullet types. - strong_em_symbol = '*' + strong_em_symbol = ASTERISK newline_style = SPACES class Options(DefaultOptions): diff --git a/tests/test_conversions.py b/tests/test_conversions.py index d07d487..e974c78 100644 --- a/tests/test_conversions.py +++ b/tests/test_conversions.py @@ -1,4 +1,4 @@ -from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH +from markdownify import markdownify as md, ATX, ATX_CLOSED, BACKSLASH, UNDERSCORE import re @@ -219,10 +219,10 @@ def test_div(): def test_strong_em_symbol(): - assert md('Hello', strong_em_symbol='_') == '__Hello__' - assert md('Hello', strong_em_symbol='_') == '__Hello__' - assert md('Hello', strong_em_symbol='_') == '_Hello_' - assert md('Hello', strong_em_symbol='_') == '_Hello_' + assert md('Hello', strong_em_symbol=UNDERSCORE) == '__Hello__' + assert md('Hello', strong_em_symbol=UNDERSCORE) == '__Hello__' + assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' + assert md('Hello', strong_em_symbol=UNDERSCORE) == '_Hello_' def test_newline_style():