From 7861b330cd05c0c19fc496530f02922d5493c568 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Sun, 23 Jun 2024 11:28:05 +0000 Subject: [PATCH] Special-case use of HTML tags for converting `` / `` (#119) Allow different strings before / after `` / `` content In particular, this allows setting `sub_symbol=''`, `sup_symbol=''`, to use raw HTML in the output when converting subscripts and superscripts. --- README.rst | 6 +++++- markdownify/__init__.py | 9 +++++++-- tests/test_conversions.py | 2 ++ 3 files changed, 14 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index 35d58fd..55ea7cf 100644 --- a/README.rst +++ b/README.rst @@ -87,7 +87,11 @@ strong_em_symbol sub_symbol, sup_symbol Define the chars that surround ```` and ```` text. Defaults to an empty string, because this is non-standard behavior. Could be something like - ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. + ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``. If the value starts + with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is + inserted after the ``<`` in the string used after the text; this allows + specifying ```` to use raw HTML in the output for subscripts, for + example. newline_style Defines the style of marking linebreaks (``
``) in markdown. The default diff --git a/markdownify/__init__.py b/markdownify/__init__.py index 6a983d9..d7bd780 100644 --- a/markdownify/__init__.py +++ b/markdownify/__init__.py @@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn): """ This abstracts all simple inline tags like b, em, del, ... Returns a function that wraps the chomped text in a pair of the string - that is returned by markup_fn. markup_fn is necessary to allow for + that is returned by markup_fn, with '/' inserted in the string used after + the text if it looks like an HTML tag. markup_fn is necessary to allow for references to self.strong_em_symbol etc. """ def implementation(self, el, text, convert_as_inline): markup = markup_fn(self) + if markup.startswith('<') and markup.endswith('>'): + markup_after = 'foo
') == 'foo' assert md('foo', sub_symbol='~') == '~foo~' + assert md('foo', sub_symbol='') == 'foo' def test_sup(): assert md('foo') == 'foo' assert md('foo', sup_symbol='^') == '^foo^' + assert md('foo', sup_symbol='') == 'foo' def test_lang():