From 7861b330cd05c0c19fc496530f02922d5493c568 Mon Sep 17 00:00:00 2001
From: Joseph Myers <josmyers@redhat.com>
Date: Sun, 23 Jun 2024 11:28:05 +0000
Subject: [PATCH] Special-case use of HTML tags for converting `<sub>` /
 `<sup>` (#119)

Allow different strings before / after `<sub>` / `<sup>` content

In particular, this allows setting `sub_symbol='<sub>'`,
`sup_symbol='<sup>'`, to use raw HTML in the output when
converting subscripts and superscripts.
---
 README.rst                | 6 +++++-
 markdownify/__init__.py   | 9 +++++++--
 tests/test_conversions.py | 2 ++
 3 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/README.rst b/README.rst
index 35d58fd..55ea7cf 100644
--- a/README.rst
+++ b/README.rst
@@ -87,7 +87,11 @@ strong_em_symbol
 sub_symbol, sup_symbol
   Define the chars that surround ``<sub>`` and ``<sup>`` text. Defaults to an
   empty string, because this is non-standard behavior. Could be something like
-  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.
+  ``~`` and ``^`` to result in ``~sub~`` and ``^sup^``.  If the value starts
+  with ``<`` and ends with ``>``, it is treated as an HTML tag and a ``/`` is
+  inserted after the ``<`` in the string used after the text; this allows
+  specifying ``<sub>`` to use raw HTML in the output for subscripts, for
+  example.
 
 newline_style
   Defines the style of marking linebreaks (``<br>``) in markdown. The default
diff --git a/markdownify/__init__.py b/markdownify/__init__.py
index 6a983d9..d7bd780 100644
--- a/markdownify/__init__.py
+++ b/markdownify/__init__.py
@@ -43,17 +43,22 @@ def abstract_inline_conversion(markup_fn):
     """
     This abstracts all simple inline tags like b, em, del, ...
     Returns a function that wraps the chomped text in a pair of the string
-    that is returned by markup_fn. markup_fn is necessary to allow for
+    that is returned by markup_fn, with '/' inserted in the string used after
+    the text if it looks like an HTML tag. markup_fn is necessary to allow for
     references to self.strong_em_symbol etc.
     """
     def implementation(self, el, text, convert_as_inline):
         markup = markup_fn(self)
+        if markup.startswith('<') and markup.endswith('>'):
+            markup_after = '</' + markup[1:]
+        else:
+            markup_after = markup
         if el.find_parent(['pre', 'code', 'kbd', 'samp']):
             return text
         prefix, suffix, text = chomp(text)
         if not text:
             return ''
-        return '%s%s%s%s%s' % (prefix, markup, text, markup, suffix)
+        return '%s%s%s%s%s' % (prefix, markup, text, markup_after, suffix)
     return implementation
 
 
diff --git a/tests/test_conversions.py b/tests/test_conversions.py
index 9652143..a35b982 100644
--- a/tests/test_conversions.py
+++ b/tests/test_conversions.py
@@ -268,11 +268,13 @@ def test_strong_em_symbol():
 def test_sub():
     assert md('<sub>foo</sub>') == 'foo'
     assert md('<sub>foo</sub>', sub_symbol='~') == '~foo~'
+    assert md('<sub>foo</sub>', sub_symbol='<sub>') == '<sub>foo</sub>'
 
 
 def test_sup():
     assert md('<sup>foo</sup>') == 'foo'
     assert md('<sup>foo</sup>', sup_symbol='^') == '^foo^'
+    assert md('<sup>foo</sup>', sup_symbol='<sup>') == '<sup>foo</sup>'
 
 
 def test_lang():